Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
8a830d49
Commit
8a830d49
authored
Jul 28, 2014
by
bow
Browse files
Merge branch 'bow'
parents
16a08dfd
d3a453a1
Changes
6
Show whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Seqtk.scala
0 → 100644
View file @
8a830d49
/**
* Copyright (c) 2014 Leiden University Medical Center
*
* @author Wibowo Arindrarto
*/
package
nl.lumc.sasc.biopet.function
import
java.io.File
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import
nl.lumc.sasc.biopet.core.config.Configurable
/**
* Abstract class for all seqtk wrappers.
*/
abstract
class
Seqtk
extends
BiopetCommandLineFunction
{
executable
=
config
(
"exe"
,
default
=
"seqtk"
,
submodule
=
"seqtk"
)
override
def
versionCommand
=
executable
override
val
versionRegex
=
"""Version: (.*)"""
.
r
}
/**
* Wrapper for the seqtk seq subcommand.
* Written based on seqtk version 1.0-r63-dirty.
*/
class
SeqtkSeq
(
val
root
:
Configurable
)
extends
Seqtk
{
/** input file */
@Input
(
doc
=
"Input file (FASTQ or FASTA)"
)
var
input
:
File
=
_
/** output file */
@Output
(
doc
=
"Output file"
)
var
output
:
File
=
_
/** mask bases with quality lower than INT [0] */
var
q
:
Option
[
Int
]
=
config
(
"q"
)
/** masked bases converted to CHAR; 0 for lowercase [0] */
var
n
:
String
=
config
(
"n"
)
/** number of residues per line; 0 for 2^32-1 [0] */
var
l
:
Option
[
Int
]
=
config
(
"l"
)
/** quality shift: ASCII-INT gives base quality [33] */
var
Q
:
Option
[
Int
]
=
config
(
"Q"
)
/** random seed (effective with -f) [11] */
var
s
:
Option
[
Int
]
=
config
(
"s"
)
/** sample FLOAT fraction of sequences [1] */
var
f
:
Option
[
Int
]
=
config
(
"f"
)
/** mask regions in BED or name list FILE [null] */
var
M
:
File
=
config
(
"M"
)
/** drop sequences with length shorter than INT [0] */
var
L
:
Option
[
Int
]
=
config
(
"L"
)
/** mask complement region (effective with -M) */
var
c
:
Boolean
=
config
(
"c"
)
/** reverse complement */
var
r
:
Boolean
=
config
(
"r"
)
/** force FASTA output (discard quality) */
var
A
:
Boolean
=
config
(
"A"
)
/** drop comments at the header lines */
var
C
:
Boolean
=
config
(
"C"
)
/** drop sequences containing ambiguous bases */
var
N
:
Boolean
=
config
(
"N"
)
/** output the 2n-1 reads only */
var
flag1
:
Boolean
=
config
(
"1"
)
/** output the 2n reads only */
var
flag2
:
Boolean
=
config
(
"2"
)
/** shift quality by '(-Q) - 33' */
var
V
:
Boolean
=
config
(
"V"
)
def
cmdLine
=
{
required
(
executable
)
+
" seq "
+
optional
(
"-q"
,
q
)
+
optional
(
"-n"
,
n
)
+
optional
(
"-l"
,
l
)
+
optional
(
"-Q"
,
Q
)
+
optional
(
"-s"
,
s
)
+
optional
(
"-f"
,
f
)
+
optional
(
"-M"
,
M
)
+
optional
(
"-L"
,
L
)
+
conditional
(
c
,
"-c"
)
+
conditional
(
r
,
"-r"
)
+
conditional
(
A
,
"-A"
)
+
conditional
(
C
,
"-C"
)
+
conditional
(
N
,
"-N"
)
+
conditional
(
flag1
,
"-1"
)
+
conditional
(
flag2
,
"-2"
)
+
conditional
(
V
,
"-V"
)
+
required
(
input
)
+
" > "
+
required
(
output
)
}
/**
* Calculates the offset required for the -Q flag for format conversion (-V flag set).
* This is required since seqtk computes the encoding offset indirectly from the input
* and output offsets.
*
* @param inQualOffset ASCII offset of the input file encoding
* @param outQualOffset ASCII offset of the output file encoding
* @return the value to be used with the -Q flag with -V set
*/
def
calcQForV
(
inQualOffset
:
Int
,
outQualOffset
:
Int
)
:
Int
=
{
// For the input for the -Q flag for seqtk, together with -V
inQualOffset
-
(
outQualOffset
-
33
)
}
}
extras/git.pre-commit
0 → 100755
View file @
8a830d49
#!/usr/bin/env python
# Adapted from: http://tech.yipit.com/2011/11/16/183772396/
# Changes by Wibowo Arindrarto
# Changes:
# - Allow code modification by linters to be comitted
# - Updated CHECKS
# - Python 3 calls + code style updates
#
# Usage: save this file into your .git/hooks directory as `pre-commit`
# and set it to executable
import
os
import
re
import
subprocess
import
sys
modified
=
re
.
compile
(
r
"^[MA]\s+(?P<name>.*)$"
)
CHECKS
=
[
{
"exe"
:
"scalariform"
,
"output"
:
"Formatting code with scalariform ..."
,
# Remove lines without filenames
"command"
:
"scalariform -s=2.11.1 -p=scalariformStyle.properties --quiet %s"
,
"match_files"
:
[
".*scala$"
],
"print_filename"
:
False
,
"commit_changes"
:
True
,
},
]
def
matches_file
(
file_name
,
match_files
):
return
any
(
re
.
compile
(
match_file
).
match
(
file_name
)
for
match_file
in
match_files
)
def
check_files
(
files
,
check
):
result
=
0
print
(
check
[
"output"
])
for
file_name
in
files
:
if
not
"match_files"
in
check
or
\
matches_file
(
file_name
,
check
[
"match_files"
]):
if
not
"ignore_files"
in
check
or
\
not
matches_file
(
file_name
,
check
[
"ignore_files"
]):
process
=
subprocess
.
Popen
(
check
[
"command"
]
%
file_name
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
shell
=
True
)
out
,
err
=
process
.
communicate
()
if
out
or
err
:
if
check
[
"print_filename"
]:
prefix
=
"
\t
%s:"
%
file_name
else
:
prefix
=
"
\t
"
output_lines
=
[
"%s%s"
%
(
prefix
,
line
)
for
line
in
out
.
splitlines
()]
print
(
"
\n
"
.
join
(
output_lines
))
if
err
:
print
(
err
)
result
=
1
elif
check
[
"commit_changes"
]:
p
=
subprocess
.
Popen
([
"git"
,
"add"
,
file_name
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
p
.
communicate
()
return
result
def
main
(
all_files
):
# Check that the required linters and code checkers are all present
for
check
in
CHECKS
:
p
=
subprocess
.
Popen
([
"which"
,
check
[
"exe"
]],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
out
,
err
=
p
.
communicate
()
if
not
out
:
print
(
"Required commit hook executable '%s' not found."
%
check
[
"exe"
])
sys
.
exit
(
1
)
# Stash any changes to the working tree that are not going to be committed
subprocess
.
call
([
"git"
,
"stash"
,
"-u"
,
"--keep-index"
],
stdout
=
subprocess
.
PIPE
)
files
=
[]
if
all_files
:
for
root
,
dirs
,
file_names
in
os
.
walk
(
"."
):
for
file_name
in
file_names
:
files
.
append
(
os
.
path
.
join
(
root
,
file_name
))
else
:
p
=
subprocess
.
Popen
([
"git"
,
"status"
,
"--porcelain"
],
stdout
=
subprocess
.
PIPE
)
out
,
err
=
p
.
communicate
()
for
line
in
out
.
splitlines
():
match
=
modified
.
match
(
line
)
if
match
:
files
.
append
(
match
.
group
(
"name"
))
result
=
0
for
check
in
CHECKS
:
result
=
check_files
(
files
,
check
)
or
result
# Strategy:
# - Check if the linters made any changes
# - If there are no changes, pop the stash and commit
# - Otherwise:
# - Stash the change
# - Pop stash@{1}
# - Checkout stash@{0}
# - Drop stash@{0} (cannot pop directly since stash may conflict)
# - Commit
# This is because the initial stash will conflict with any possible
# changes made by the linters
p
=
subprocess
.
Popen
([
"git"
,
"status"
,
"--porcelain"
],
stdout
=
subprocess
.
PIPE
)
out
,
err
=
p
.
communicate
()
if
not
out
.
strip
():
subprocess
.
call
([
"git"
,
"stash"
,
"pop"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
else
:
subprocess
.
call
([
"git"
,
"stash"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
subprocess
.
call
([
"git"
,
"stash"
,
"pop"
,
"--quiet"
,
"--index"
,
"stash@{1}"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
subprocess
.
call
([
"git"
,
"checkout"
,
"stash"
,
"--"
,
"."
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
subprocess
.
call
([
"git"
,
"stash"
,
"drop"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
sys
.
exit
(
result
)
if
__name__
==
"__main__"
:
all_files
=
False
if
len
(
sys
.
argv
)
>
1
and
sys
.
argv
[
1
]
==
"--all-files"
:
all_files
=
True
main
(
all_files
)
flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Cutadapt.scala
View file @
8a830d49
...
...
@@ -19,6 +19,9 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction {
@Output
(
doc
=
"Output fastq file"
)
var
fastq_output
:
File
=
_
@Output
(
doc
=
"Output statistics file"
)
var
stats_output
:
File
=
_
executable
=
config
(
"exe"
,
default
=
"cutadapt"
)
override
def
versionCommand
=
executable
+
" --version"
override
val
versionRegex
=
"""(.*)"""
.
r
...
...
@@ -49,7 +52,8 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction {
optional
(
"-M"
,
opt_maximum_length
)
+
// input / output
required
(
fastq_input
)
+
" > "
+
required
(
fastq_output
)
required
(
"--output"
,
fastq_output
)
+
" > "
+
required
(
stats_output
)
}
else
{
analysisName
=
getClass
.
getSimpleName
+
"-ln"
"ln -sf "
+
...
...
flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Sickle.scala
View file @
8a830d49
...
...
@@ -11,25 +11,25 @@ import nl.lumc.sasc.biopet.core.config._
class
Sickle
(
val
root
:
Configurable
)
extends
BiopetCommandLineFunction
{
@Input
(
doc
=
"R1 input"
)
var
input_R1
:
File
=
null
var
input_R1
:
File
=
_
@Input
(
doc
=
"R2 input"
,
required
=
false
)
var
input_R2
:
File
=
null
var
input_R2
:
File
=
_
@Input
(
doc
=
"qualityType file"
,
required
=
false
)
var
qualityTypeFile
:
File
=
_
@Output
(
doc
=
"R1 output"
)
var
output_R1
:
File
=
null
var
output_R1
:
File
=
_
@Output
(
doc
=
"R2 output"
,
required
=
false
)
var
output_R2
:
File
=
null
var
output_R2
:
File
=
_
@Output
(
doc
=
"singles output"
,
required
=
false
)
var
output_singles
:
File
=
null
var
output_singles
:
File
=
_
@Output
(
doc
=
"stats output"
)
var
output_stats
:
File
=
null
var
output_stats
:
File
=
_
executable
=
config
(
"exe"
,
default
=
"sickle"
)
var
qualityType
:
String
=
config
(
"qualitytype"
)
...
...
flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala
View file @
8a830d49
...
...
@@ -128,13 +128,19 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
var
R2
:
File
=
new
File
(
R2_in
)
if
(!
skipClip
)
{
// Adapter clipping
val
cutadapt_R1
=
new
Cutadapt
(
this
)
if
(!
skipTrim
||
paired
)
cutadapt_R1
.
isIntermediate
=
true
cutadapt_R1
.
fastq_input
=
R1
cutadapt_R1
.
fastq_output
=
swapExt
(
outDir
,
R1
,
R1_ext
,
".clip"
+
R1_ext
)
cutadapt_R1
.
stats_output
=
swapExt
(
outDir
,
R1
,
R1_ext
,
".clip.stats"
)
if
(
outputFiles
.
contains
(
"contams_R1"
))
cutadapt_R1
.
contams_file
=
outputFiles
(
"contams_R1"
)
add
(
cutadapt_R1
)
R1
=
cutadapt_R1
.
fastq_output
if
(
paired
)
{
val
cutadapt_R2
=
new
Cutadapt
(
this
)
if
(!
skipTrim
||
paired
)
cutadapt_R2
.
isIntermediate
=
true
...
...
scalariformStyle.properties
View file @
8a830d49
...
...
@@ -9,6 +9,7 @@ formatXml=true
indentLocalDefs
=
false
indentPackageBlocks
=
true
indentSpaces
=
2
placeScaladocAsterisksBeneathSecondAsterisk
=
false
preserveDanglingCloseParenthesis
=
false
preserveSpaceBeforeArguments
=
false
rewriteArrowSymbols
=
false
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment