Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
30ef4896
Commit
30ef4896
authored
Jun 02, 2016
by
bow
Browse files
Add initial hisat2 wrapper
parent
e5d79321
Changes
1
Hide whitespace changes
Inline
Side-by-side
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/hisat/Hisat2.scala
0 → 100644
View file @
30ef4896
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package
nl.lumc.sasc.biopet.extensions.hisat
import
java.io.File
import
nl.lumc.sasc.biopet.core.
{
BiopetCommandLineFunction
,
Reference
,
Version
}
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
/**
* Extension for hisat2
*
* Based on version 2.0.4
*/
class
Hisat2
(
val
root
:
Configurable
)
extends
BiopetCommandLineFunction
with
Reference
with
Version
{
// TODO: handle --sra-acc flag. This is currently unsupported by the wrapper.
@Input
(
doc
=
"Fastq file R1"
,
shortName
=
"R1"
)
var
R1
:
File
=
null
@Input
(
doc
=
"Fastq file R2"
,
shortName
=
"R2"
,
required
=
false
)
var
R2
:
Option
[
File
]
=
None
@Output
(
doc
=
"Output file SAM"
,
shortName
=
"output"
,
required
=
true
)
var
output
:
File
=
null
executable
=
config
(
"exe"
,
default
=
"hisat2"
,
freeVar
=
false
)
def
versionRegex
=
""".*hisat2-align-s version (.*)"""
.
r
override
def
versionExitcode
=
List
(
0
,
1
)
def
versionCommand
=
executable
+
" --version"
override
def
defaultCoreMemory
=
4.0
override
def
defaultThreads
=
4
/* Required */
var
hisat2Index
:
String
=
config
(
"hisat2_index"
)
/* Input options */
var
q
:
Boolean
=
config
(
"q"
,
default
=
false
)
var
qseq
:
Boolean
=
config
(
"qseq"
,
default
=
false
)
var
f
:
Boolean
=
config
(
"f"
,
default
=
false
)
var
r
:
Boolean
=
config
(
"r"
,
default
=
false
)
var
c
:
Boolean
=
config
(
"c"
,
default
=
false
)
var
skip
:
Option
[
Int
]
=
config
(
"skip"
)
var
upto
:
Option
[
Int
]
=
config
(
"upto"
)
var
trim5
:
Option
[
Int
]
=
config
(
"trim5"
)
var
trim3
:
Option
[
Int
]
=
config
(
"trim3"
)
var
phred33
:
Boolean
=
config
(
"phred33"
,
default
=
false
)
var
phred64
:
Boolean
=
config
(
"phred64"
,
default
=
false
)
var
intQuals
:
Boolean
=
config
(
"int_quals"
,
default
=
false
)
/* Alignment options */
var
nCeil
:
Option
[
String
]
=
config
(
"n_ceil"
)
var
ignoreQuals
:
Boolean
=
config
(
"ignore_quals"
,
default
=
false
)
var
nofw
:
Boolean
=
config
(
"nofw"
,
default
=
false
)
var
norc
:
Boolean
=
config
(
"norc"
,
default
=
false
)
/* Spliced alignment */
var
penCansplice
:
Option
[
Int
]
=
config
(
"pen_cansplice"
)
var
penNoncansplice
:
Option
[
Int
]
=
config
(
"pen_noncansplice"
)
var
penCanintronlen
:
Option
[
Int
]
=
config
(
"pen_canintronlen"
)
var
penNoncanintronlen
:
Option
[
Int
]
=
config
(
"pen_noncanintronlen"
)
var
minIntronlen
:
Option
[
Int
]
=
config
(
"min_intronlen"
)
var
maxIntronlen
:
Option
[
Int
]
=
config
(
"max_intronlen"
)
var
knownSplicesiteInfile
:
Option
[
File
]
=
config
(
"known_splicesite_infile"
)
var
novelSplicesiteOutfile
:
Option
[
File
]
=
config
(
"novel_splicesite_outfile"
)
var
novelSplicesiteInfile
:
Option
[
File
]
=
config
(
"novel_splicesite_infile"
)
var
noTempSplicesite
:
Boolean
=
config
(
"no_temp_splicesite"
,
default
=
false
)
var
noSplicedAlignment
:
Boolean
=
config
(
"no_spliced_alignment"
,
default
=
false
)
var
rnaStrandness
:
Option
[
String
]
=
config
(
"rna_strandness"
)
var
tmo
:
Boolean
=
config
(
"tmo"
,
default
=
false
)
var
dta
:
Boolean
=
config
(
"dta"
,
default
=
false
)
var
dtaCufflinks
:
Boolean
=
config
(
"dta_cufflinks"
,
default
=
false
)
/* Scoring */
var
ma
:
Option
[
Int
]
=
config
(
"ma"
)
var
mp
:
Option
[
String
]
=
config
(
"mp"
)
// TODO: should be (Int, Int)
var
sp
:
Option
[
String
]
=
config
(
"sp"
)
// TODO: should be (Int, Int)
var
np
:
Option
[
Int
]
=
config
(
"np"
)
var
rdg
:
Option
[
String
]
=
config
(
"rdg"
)
// TODO: should be (Int, Int)
var
rfg
:
Option
[
String
]
=
config
(
"rfg"
)
// TODO: should be (Int, Int)
var
scoreMin
:
Option
[
String
]
=
config
(
"score_min"
)
/* Reporting */
var
k
:
Option
[
Int
]
=
config
(
"k"
)
var
all
:
Option
[
Int
]
=
config
(
"all"
)
/* Paired-end */
var
fr
:
Boolean
=
config
(
"fr"
,
default
=
false
)
var
rf
:
Boolean
=
config
(
"rf"
,
default
=
false
)
var
ff
:
Boolean
=
config
(
"ff"
,
default
=
false
)
var
noMixed
:
Boolean
=
config
(
"no_mixed"
,
default
=
false
)
var
noDiscordant
:
Boolean
=
config
(
"no_discordant"
,
default
=
false
)
/* Output */
var
time
:
Boolean
=
config
(
"no_overlap"
,
default
=
false
)
var
un
:
Option
[
String
]
=
config
(
"un"
)
var
al
:
Option
[
String
]
=
config
(
"al"
)
var
unConc
:
Option
[
String
]
=
config
(
"un_conc"
)
var
alConc
:
Option
[
String
]
=
config
(
"al_conc"
)
var
unGz
:
Option
[
String
]
=
config
(
"un_gz"
)
var
alGz
:
Option
[
String
]
=
config
(
"al_gz"
)
var
unConcGz
:
Option
[
String
]
=
config
(
"un_conc_gz"
)
var
alConcGz
:
Option
[
String
]
=
config
(
"al_conc_gz"
)
var
unBz2
:
Option
[
String
]
=
config
(
"un_bz2"
)
var
alBz2
:
Option
[
String
]
=
config
(
"al_bz2"
)
var
unConcBz2
:
Option
[
String
]
=
config
(
"un_conc_bz2"
)
var
alConcBz2
:
Option
[
String
]
=
config
(
"al_conc_bz2"
)
var
quiet
:
Boolean
=
config
(
"quiet"
,
default
=
false
)
var
metFile
:
Option
[
String
]
=
config
(
"met_file"
)
var
metStderr
:
Boolean
=
config
(
"met_stderr"
,
default
=
false
)
var
met
:
Option
[
Int
]
=
config
(
"met"
)
var
noHead
:
Boolean
=
config
(
"no_head"
,
default
=
false
)
var
noSq
:
Boolean
=
config
(
"no_sq"
,
default
=
false
)
var
rgId
:
Option
[
String
]
=
config
(
"rg_id"
)
var
rg
:
List
[
String
]
=
config
(
"rg"
,
default
=
Nil
)
var
omitSecSeq
:
Boolean
=
config
(
"omit_sec_seq"
,
default
=
false
)
/* Performance */
var
offrate
:
Option
[
Int
]
=
config
(
"offrate"
)
var
reorder
:
Boolean
=
config
(
"reorder"
,
default
=
false
)
var
mm
:
Boolean
=
config
(
"mm"
,
default
=
false
)
/* Other */
var
qcFilter
:
Boolean
=
config
(
"qc_filter"
,
default
=
false
)
var
seed
:
Option
[
Int
]
=
config
(
"seed"
)
var
nonDeterministic
:
Boolean
=
config
(
"non_deterministic"
,
default
=
false
)
var
removeChrName
:
Boolean
=
config
(
"remove_chrname"
,
default
=
false
)
var
addChrName
:
Boolean
=
config
(
"add_chrname"
,
default
=
false
)
override
def
beforeGraph
()
{
super
.
beforeGraph
()
val
indexDir
=
new
File
(
hisat2Index
).
getParentFile
val
basename
=
hisat2Index
.
stripPrefix
(
indexDir
.
getPath
+
File
.
separator
)
if
(
indexDir
.
exists
())
{
if
(!
indexDir
.
list
()
.
toList
.
filter
(
_
.
startsWith
(
basename
))
.
exists
({
p
=>
p
.
endsWith
(
".bt2"
)
||
p
.
endsWith
(
".bt2l"
)
}))
Logging
.
addError
(
s
"No index files found for hisat2 in: $indexDir with basename: $basename"
)
}
}
/** return commandline to execute */
def
cmdLine
=
required
(
executable
)
+
conditional
(
q
,
"-q"
)
+
conditional
(
qseq
,
"--qseq"
)
+
conditional
(
f
,
"-f"
)
+
conditional
(
r
,
"-r"
)
+
conditional
(
c
,
"-c"
)
+
optional
(
"--skip"
,
skip
)
+
optional
(
"--upto"
,
upto
)
+
optional
(
"--trim3"
,
trim3
)
+
optional
(
"--trim5"
,
trim5
)
+
conditional
(
phred33
,
"--phred33"
)
+
conditional
(
phred64
,
"--phred64"
)
+
conditional
(
intQuals
,
"--int-quals"
)
+
/* Alignment options */
optional
(
"--n-ceil"
,
nCeil
)
+
conditional
(
ignoreQuals
,
"--ignore-quals"
)
+
conditional
(
nofw
,
"--nofw"
)
+
conditional
(
norc
,
"--norc"
)
+
/* Spliced alignment */
optional
(
"--pen-cansplice"
,
penCansplice
)
+
optional
(
"--pen-noncansplice"
,
penNoncansplice
)
+
optional
(
"--pen-canintronlen"
,
penCanintronlen
)
+
optional
(
"--pen-noncanintronlen"
,
penNoncanintronlen
)
+
optional
(
"--min-intronlen"
,
minIntronlen
)
+
optional
(
"--max-intronlen"
,
maxIntronlen
)
+
optional
(
"--known-splicesite-infile"
,
knownSplicesiteInfile
)
+
optional
(
"--novel-splicesite-outfile"
,
novelSplicesiteOutfile
)
+
optional
(
"--novel-splicesite-infile"
,
novelSplicesiteInfile
)
+
conditional
(
noTempSplicesite
,
"--no-temp-splicesite"
)
+
conditional
(
noSplicedAlignment
,
"--no-spliced-alignment"
)
+
optional
(
"--rna-strandness"
,
rnaStrandness
)
+
conditional
(
tmo
,
"--tmo"
)
+
conditional
(
dta
,
"--dta"
)
+
conditional
(
dtaCufflinks
,
"--dta-cufflinks"
)
+
/* Scoring */
optional
(
"--ma"
,
ma
)
+
optional
(
"--mp"
,
mp
)
+
optional
(
"--np"
,
np
)
+
optional
(
"--sp"
,
sp
)
+
optional
(
"--rdg"
,
rdg
)
+
optional
(
"--rfg"
,
rfg
)
+
optional
(
"--score-min"
,
scoreMin
)
+
/* Reporting */
optional
(
"-k"
,
k
)
+
optional
(
"--all"
,
all
)
+
/* Paired End */
conditional
(
fr
,
"--fr"
)
+
conditional
(
rf
,
"--rf"
)
+
conditional
(
ff
,
"--ff"
)
+
conditional
(
noMixed
,
"--no-mixed"
)
+
conditional
(
noDiscordant
,
"--no-discordant"
)
+
/* Output */
conditional
(
time
,
"--time"
)
+
optional
(
"--un"
,
un
)
+
optional
(
"--al"
,
al
)
+
optional
(
"--un-conc"
,
unConc
)
+
optional
(
"--al-conc"
,
alConc
)
+
optional
(
"--un-gz"
,
unGz
)
+
optional
(
"--al-gz"
,
alGz
)
+
optional
(
"--un-conc-gz"
,
unConcGz
)
+
optional
(
"--al-conc-gz"
,
alConcGz
)
+
optional
(
"--un-bz2"
,
unBz2
)
+
optional
(
"--al-bz2"
,
alBz2
)
+
optional
(
"--un-conc-bz2"
,
unConcBz2
)
+
optional
(
"--al-conc-bz2"
,
alConcBz2
)
+
conditional
(
quiet
,
"--quiet"
)
+
optional
(
"--met-file"
,
metFile
)
+
conditional
(
metStderr
,
"--met-stderr"
)
+
optional
(
"--met"
,
met
)
+
conditional
(
noHead
,
"--no-head"
)
+
conditional
(
noSq
,
"--no-sq"
)
+
optional
(
"--rg-id"
,
rgId
)
+
repeat
(
"--rg"
,
rg
)
+
conditional
(
omitSecSeq
,
"--omit-sec-seq"
)
+
/* Performance */
optional
(
"--offrate"
,
offrate
)
+
optional
(
"--threads"
,
threads
)
+
conditional
(
reorder
,
"--reorder"
)
+
conditional
(
mm
,
"--mm"
)
+
/* Other */
conditional
(
qcFilter
,
"--qc-filter"
)
+
optional
(
"--seed"
,
seed
)
+
conditional
(
nonDeterministic
,
"--non-deterministic"
)
+
conditional
(
removeChrName
,
"--remove-chrname"
)
+
conditional
(
addChrName
,
"--add-chrname"
)
+
/* Required */
required
(
"-x"
,
hisat2Index
)
+
(
R2
match
{
case
Some
(
r2
)
=>
required
(
"-1"
,
R1
)
+
optional
(
"-2"
,
r2
)
case
otherwise
=>
required
(
"-U"
,
R1
)
})
+
(
if
(
outputAsStsout
)
""
else
required
(
"-S"
,
output
))
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment