Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
a7522c76
Commit
a7522c76
authored
Jul 23, 2014
by
bow
Browse files
Add initial wrapper for seqtk
parent
f15bfd77
Changes
1
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Seqtk.scala
0 → 100644
View file @
a7522c76
package
nl.lumc.sasc.biopet.function
import
java.io.File
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import
nl.lumc.sasc.biopet.core.config.Configurable
abstract
class
Seqtk
extends
BiopetCommandLineFunction
{
executable
=
config
(
"exe"
,
default
=
"seqtk"
,
submodule
=
"seqtk"
)
override
def
versionCommand
=
executable
override
val
versionRegex
=
"""Version: (.*)"""
.
r
}
class
SeqtkSeq
(
val
root
:
Configurable
)
extends
Seqtk
{
@Input
(
doc
=
"Input file (FASTQ or FASTA)"
)
var
input
:
File
=
_
@Output
(
doc
=
"Output file"
)
var
output
:
File
=
_
/* mask bases with quality lower than INT [0] */
var
q
:
Option
[
Int
]
=
config
(
"q"
)
/* masked bases converted to CHAR; 0 for lowercase [0] */
var
n
:
String
=
config
(
"n"
)
/* number of residues per line; 0 for 2^32-1 [0] */
var
l
:
Option
[
Int
]
=
config
(
"l"
)
/* quality shift: ASCII-INT gives base quality [33] */
var
Q
:
Option
[
Int
]
=
config
(
"Q"
)
/* random seed (effective with -f) [11] */
var
s
:
Option
[
Int
]
=
config
(
"s"
)
/* sample FLOAT fraction of sequences [1] */
var
f
:
Option
[
Int
]
=
config
(
"f"
)
/* mask regions in BED or name list FILE [null] */
var
M
:
File
=
config
(
"M"
)
/* drop sequences with length shorter than INT [0] */
var
L
:
Option
[
Int
]
=
config
(
"L"
)
/* mask complement region (effective with -M) */
var
c
:
Boolean
=
config
(
"c"
)
/* reverse complement */
var
r
:
Boolean
=
config
(
"r"
)
/* force FASTA output (discard quality) */
var
A
:
Boolean
=
config
(
"A"
)
/* drop comments at the header lines */
var
C
:
Boolean
=
config
(
"C"
)
/* drop sequences containing ambiguous bases */
var
N
:
Boolean
=
config
(
"N"
)
/* output the 2n-1 reads only */
var
flag1
:
Boolean
=
config
(
"1"
)
/* output the 2n reads only */
var
flag2
:
Boolean
=
config
(
"2"
)
/* shift quality by '(-Q) - 33' */
var
V
:
Boolean
=
config
(
"V"
)
def
cmdLine
=
{
required
(
executable
)
+
" seq "
+
optional
(
"-q"
,
q
)
+
optional
(
"-n"
,
n
)
+
optional
(
"-l"
,
l
)
+
optional
(
"-Q"
,
Q
)
+
optional
(
"-s"
,
s
)
+
optional
(
"-f"
,
f
)
+
optional
(
"-M"
,
M
)
+
optional
(
"-L"
,
L
)
+
conditional
(
c
,
"-c"
)
+
conditional
(
r
,
"-r"
)
+
conditional
(
A
,
"-A"
)
+
conditional
(
C
,
"-C"
)
+
conditional
(
N
,
"-N"
)
+
conditional
(
flag1
,
"-1"
)
+
conditional
(
flag2
,
"-2"
)
+
conditional
(
V
,
"-V"
)
+
required
(
input
)
+
" > "
+
required
(
output
)
}
/**
* Calculates the offset required for the -Q flag for format conversion (-V flag set).
* This is required since seqtk computes the encoding offset indirectly from the input
* and output offsets.
*
* @param inQualOffset ASCII offset of the input file encoding
* @param outQualOffset ASCII offset of the output file encoding
* @return the value to be used with the -Q flag with -V set
*/
def
calcQForV
(
inQualOffset
:
Int
,
outQualOffset
:
Int
)
:
Int
=
{
// For the input for the -Q flag for seqtk, together with -V
inQualOffset
-
(
outQualOffset
-
33
)
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment