Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
57b31904
Commit
57b31904
authored
Nov 02, 2016
by
Sander Bollen
Browse files
Merge branch 'feature-fastq_filter' into 'develop'
Fix for BIOPET-402 See merge request !467
parents
b6382747
942ad5e7
Changes
5
Hide whitespace changes
Inline
Side-by-side
biopet-tools-package/src/main/scala/nl/lumc/sasc/biopet/BiopetToolsExecutable.scala
View file @
57b31904
...
...
@@ -31,6 +31,7 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl
.
lumc
.
sasc
.
biopet
.
tools
.
ExtractAlignedFastq
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
FastqSplitter
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
FastqSync
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
FastqFilter
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
FindRepeatsPacBio
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
FindOverlapMatch
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
GvcfToBed
,
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/FastqFilter.scala
0 → 100644
View file @
57b31904
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
htsjdk.samtools.fastq.
{
AsyncFastqWriter
,
BasicFastqWriter
,
FastqReader
}
import
nl.lumc.sasc.biopet.utils.ToolCommand
import
scala.util.matching.Regex
import
scala.collection.JavaConversions._
/**
* Created by pjvan_thof on 28-10-16.
*/
object
FastqFilter
extends
ToolCommand
{
/**
* Arg for commandline program
* @param inputFile input fastq file
* @param outputFile output fastq files
*/
case
class
Args
(
inputFile
:
File
=
null
,
outputFile
:
File
=
null
,
idRegex
:
Option
[
Regex
]
=
None
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'I'
,
"inputFile"
)
required
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
inputFile
=
x
)
}
text
"Path to input file"
opt
[
File
](
'o'
,
"output"
)
required
()
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputFile
=
x
)
}
text
"Path to output file"
opt
[
String
](
"idRegex"
)
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
idRegex
=
Some
(
x
.
r
))
}
text
"Regex to match ID"
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
val
argsParser
=
new
OptParser
val
cmdArgs
:
Args
=
argsParser
.
parse
(
args
,
Args
())
getOrElse
(
throw
new
IllegalArgumentException
)
logger
.
info
(
"Start"
)
val
reader
=
new
FastqReader
(
cmdArgs
.
inputFile
)
val
writer
=
new
AsyncFastqWriter
(
new
BasicFastqWriter
(
cmdArgs
.
outputFile
),
10000
)
var
total
=
0
var
kept
=
0
for
(
record
<-
reader
.
iterator
())
{
if
(
cmdArgs
.
idRegex
.
map
(
_
.
findFirstIn
(
record
.
getReadHeader
.
takeWhile
(
_
!=
' '
)).
isDefined
).
getOrElse
(
true
))
{
writer
.
write
(
record
)
kept
+=
1
}
total
+=
1
if
(
total
%
100000
==
0
)
logger
.
info
(
s
"Total reads: $total, reads left: $kept"
)
}
logger
.
info
(
s
"Total reads: $total, reads left: $kept"
)
writer
.
close
()
reader
.
close
()
logger
.
info
(
"Done"
)
}
}
biopet-tools/src/test/resources/paired01_post_filter.fq
0 → 100644
View file @
57b31904
@r01_filter hello
A
+
H
@r03_filter
G
+
H
biopet-tools/src/test/resources/paired01_pre_filter.fq
0 → 100644
View file @
57b31904
@r01_filter hello
A
+
H
@r02
T
+
I
@r03_filter
G
+
H
@r04
C
+
I
@r05
A
+
H
biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/FastqFilterTest.scala
0 → 100644
View file @
57b31904
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
java.nio.file.Paths
import
org.scalatest.Matchers
import
org.scalatest.mock.MockitoSugar
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
import
scala.io.Source
/**
* Created by ahbbollen on 27-8-15.
*/
class
FastqFilterTest
extends
TestNGSuite
with
MockitoSugar
with
Matchers
{
import
FastqFilter._
private
def
resourcePath
(
p
:
String
)
:
String
=
{
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
}
val
preFilterFastq
=
resourcePath
(
"/paired01_pre_filter.fq"
)
val
postFilterFastq
=
resourcePath
(
"/paired01_post_filter.fq"
)
@Test
def
testMain
()
=
{
val
temp
=
File
.
createTempFile
(
"out"
,
".fastq"
)
temp
.
deleteOnExit
()
val
args
=
Array
(
"-I"
,
preFilterFastq
,
"-o"
,
temp
.
getAbsolutePath
,
"--idRegex"
,
"_filter$"
)
main
(
args
)
Source
.
fromFile
(
temp
).
getLines
().
toList
shouldBe
Source
.
fromFile
(
postFilterFastq
).
getLines
().
toList
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment