Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
d980652c
Commit
d980652c
authored
Oct 14, 2014
by
wyleung
Browse files
Parts for the SV calling pipeline, formatting and adding breakdancer as subpipeline
parent
60c0ac17
Changes
4
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/aligners/Stampy.scala
View file @
d980652c
...
...
@@ -24,25 +24,24 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
@Output
(
doc
=
"Output file SAM"
,
shortName
=
"output"
)
var
output
:
File
=
_
// options set via API or config
// var numrecords: String = config("numrecords", default = "all")
// var numrecords: String = config("numrecords", default = "all")
var
solexa
:
Boolean
=
config
(
"solexa"
,
default
=
false
)
var
solexaold
:
Boolean
=
config
(
"solexaold"
,
default
=
false
)
var
sanger
:
Boolean
=
config
(
"sanger"
,
default
=
false
)
var
insertsize
:
Option
[
Int
]
=
config
(
"insertsize"
,
default
=
250
)
var
insertsd
:
Option
[
Int
]
=
config
(
"insertsd"
,
default
=
60
)
var
insertsize2
:
Option
[
Int
]
=
config
(
"insertsize2"
,
default
=
-
2000
)
var
insertsd2
:
Option
[
Int
]
=
config
(
"insertsd2"
,
default
=
-
1
)
var
sensitive
:
Boolean
=
config
(
"sensitive"
,
default
=
false
)
var
fast
:
Boolean
=
config
(
"fast"
,
default
=
false
)
var
readgroup
:
String
=
config
(
"readgroup"
)
var
verbosity
:
Option
[
Int
]
=
config
(
"verbosity"
,
default
=
2
)
var
logfile
:
String
=
config
(
"logfile"
)
executable
=
config
(
"exe"
,
default
=
"stampy.py"
,
freeVar
=
false
)
override
val
versionRegex
=
"""stampy v(.*) \(.*\), .*"""
.
r
override
val
versionExitcode
=
List
(
0
,
1
)
...
...
@@ -52,33 +51,33 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
override
val
defaultThreads
=
8
override
def
versionCommand
=
executable
+
" --help"
def
cmdLine
:
String
=
{
def
cmdLine
:
String
=
{
var
cmd
:
String
=
required
(
executable
)
+
optional
(
"-t"
,
nCoresRequest
)
+
conditional
(
solexa
,
"--solexa"
)
+
conditional
(
solexaold
,
"--solexaold"
)
+
conditional
(
sanger
,
"--sanger"
)
+
optional
(
"--insertsize"
,
insertsize
)
+
optional
(
"--insertsd"
,
insertsd
)
// Optionally start Mate Pair alignment, if set, the aligner will
// assign MP reads as MP, otherwise in PE mode, these reads will
optional
(
"-t"
,
nCoresRequest
)
+
conditional
(
solexa
,
"--solexa"
)
+
conditional
(
solexaold
,
"--solexaold"
)
+
conditional
(
sanger
,
"--sanger"
)
+
optional
(
"--insertsize"
,
insertsize
)
+
optional
(
"--insertsd"
,
insertsd
)
// Optionally start Mate Pair alignment, if set, the aligner will
// assign MP reads as MP, otherwise in PE mode, these reads will
// be aligned with the bits RR or FF showing a False Inversion event
if
(
insertsd2
.
getOrElse
(-
1
)
!=
-
1
)
{
if
(
insertsd2
.
getOrElse
(-
1
)
!=
-
1
)
{
cmd
+=
optional
(
"--insertsize2"
,
insertsize2
)
+
optional
(
"--insertsd2"
,
insertsd2
)
optional
(
"--insertsd2"
,
insertsd2
)
}
cmd
+=
conditional
(
sensitive
,
"--sensitive"
)
+
conditional
(
fast
,
"--fast"
)
+
optional
(
"--readgroup"
,
readgroup
)
+
optional
(
"-v"
,
verbosity
)
+
optional
(
"--logfile"
,
logfile
)
+
" -g "
+
required
(
genome
)
+
" -h "
+
required
(
hash
)
+
" -o "
+
required
(
output
)
+
" -M "
+
required
(
R1
)
+
optional
(
R2
)
conditional
(
fast
,
"--fast"
)
+
optional
(
"--readgroup"
,
readgroup
)
+
optional
(
"-v"
,
verbosity
)
+
optional
(
"--logfile"
,
logfile
)
+
" -g "
+
required
(
genome
)
+
" -h "
+
required
(
hash
)
+
" -o "
+
required
(
output
)
+
" -M "
+
required
(
R1
)
+
optional
(
R2
)
return
cmd
}
}
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/Breakdancer.scala
View file @
d980652c
...
...
@@ -3,6 +3,7 @@ package nl.lumc.sasc.biopet.extensions.svcallers
import
nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import
org.broadinstitute.gatk.queue.QScript
import
nl.lumc.sasc.biopet.core.BiopetQScript
import
nl.lumc.sasc.biopet.core.PipelineCommand
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
,
Argument
}
import
java.io.File
...
...
@@ -63,7 +64,10 @@ object BreakdancerConfig {
/*
* The caller
*
**/
class
BreakdancerCaller
(
val
root
:
Configurable
)
extends
BiopetCommandLineFunction
{
...
...
@@ -77,7 +81,7 @@ class BreakdancerCaller(val root: Configurable) extends BiopetCommandLineFunctio
override
def
versionCommand
=
executable
@Input
(
doc
=
"
Input file (bam)
"
)
@Input
(
doc
=
"
The breakdancer configuration file
"
)
var
input
:
File
=
_
@Argument
(
doc
=
"Work directory"
)
...
...
@@ -86,12 +90,39 @@ class BreakdancerCaller(val root: Configurable) extends BiopetCommandLineFunctio
@Output
(
doc
=
"Breakdancer VCF output"
)
var
output
:
File
=
_
// var T: Option[Int] = config("T", default = defaultThreads)
var
f
:
Boolean
=
config
(
"f"
,
default
=
true
)
// delete work directory before running
// var w: String = config("w", default = workdir + "/work")
var
a
:
Boolean
=
config
(
"a"
,
default
=
false
)
// don't recompute AS tags
var
k
:
Boolean
=
config
(
"k"
,
default
=
false
)
// keep working directory
var
r
:
Boolean
=
config
(
"r"
,
default
=
false
)
// take read groups into account
/*
Options:
-o STRING operate on a single chromosome [all chromosome]
-s INT minimum length of a region [7]
-c INT cutoff in unit of standard deviation [3]
-m INT maximum SV size [1000000000]
-q INT minimum alternative mapping quality [35]
-r INT minimum number of read pairs required to establish a connection [2]
-x INT maximum threshold of haploid sequence coverage for regions to be ignored [1000]
-b INT buffer size for building connection [100]
-t only detect transchromosomal rearrangement, by default off
-d STRING prefix of fastq files that SV supporting reads will be saved by library
-g STRING dump SVs and supporting reads in BED format for GBrowse
-l analyze Illumina long insert (mate-pair) library
-a print out copy number and support reads per library rather than per bam, by default off
-h print out Allele Frequency column, by default off
-y INT output score filter [30]
*/
var
s
:
Option
[
Int
]
=
config
(
"s"
,
default
=
7
)
var
c
:
Option
[
Int
]
=
config
(
"c"
,
default
=
3
)
var
m
:
Option
[
Int
]
=
config
(
"m"
,
default
=
1000000000
)
var
q
:
Option
[
Int
]
=
config
(
"qs"
,
default
=
35
)
var
r
:
Option
[
Int
]
=
config
(
"r"
,
default
=
2
)
var
x
:
Option
[
Int
]
=
config
(
"x"
,
default
=
1000
)
var
b
:
Option
[
Int
]
=
config
(
"b"
,
default
=
100
)
var
t
:
Boolean
=
config
(
"t"
,
default
=
false
)
var
d
:
String
=
config
(
"d"
)
var
g
:
String
=
config
(
"g"
)
var
l
:
Boolean
=
config
(
"l"
,
default
=
false
)
var
a
:
Boolean
=
config
(
"a"
,
default
=
false
)
var
h
:
Boolean
=
config
(
"h"
,
default
=
false
)
var
y
:
Option
[
Int
]
=
config
(
"y"
,
default
=
30
)
override
def
beforeCmd
{
if
(
workdir
==
null
)
throw
new
Exception
(
"Breakdancer :: Workdirectory is not defined"
)
...
...
@@ -99,7 +130,23 @@ class BreakdancerCaller(val root: Configurable) extends BiopetCommandLineFunctio
}
def
cmdLine
=
required
(
executable
)
+
required
(
input
)
+
">"
+
required
(
output
)
optional
(
"-s"
,
s
)
+
optional
(
"-c"
,
c
)
+
optional
(
"-m"
,
m
)
+
optional
(
"-q"
,
q
)
+
optional
(
"-r"
,
r
)
+
optional
(
"-x"
,
x
)
+
optional
(
"-b"
,
b
)
+
conditional
(
t
,
"-t"
)
+
optional
(
"-d"
,
d
)
+
optional
(
"-g"
,
g
)
+
conditional
(
l
,
"-l"
)
+
conditional
(
a
,
"-a"
)
+
conditional
(
h
,
"-h"
)
+
optional
(
"-y"
,
y
)
+
required
(
input
)
+
">"
+
required
(
output
)
}
object
BreakdancerCaller
{
...
...
@@ -124,40 +171,49 @@ class Breakdancer(val root: Configurable) extends QScript with BiopetQScript {
@Argument
(
doc
=
"Work directory"
)
var
workdir
:
String
=
_
@Output
(
doc
=
"Breakdancer VCF output"
)
var
output
:
File
=
_
// @Output(doc = "Breakdancer VCF output")
// lazy val outputvcf: File = {
// new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.vcf")
// }
@Output
(
doc
=
"Breakdancer raw output"
)
lazy
val
outputraw
:
File
=
{
new
File
(
workdir
+
"/"
+
input
.
getName
.
substring
(
0
,
input
.
getName
.
lastIndexOf
(
".bam"
))
+
".breakdancer.tsv"
)
}
override
def
init
()
{
}
def
biopetScript
()
{
// write the pipeline here
// start with QC, alignment, call sambamba, call sv callers, reporting
// read config and set all parameters for the pipeline
logger
.
info
(
"Starting Breakdancer"
)
logger
.
info
(
"Starting Breakdancer
configuration
"
)
val
bdcfg
=
BreakdancerConfig
(
this
,
input
,
workdir
)
outputFiles
+=
(
"breakdancer_cfg"
->
bdcfg
.
output
)
add
(
bdcfg
)
val
output_vcf
:
File
=
new
File
(
workdir
+
"/"
+
input
.
getName
.
substring
(
0
,
input
.
getName
.
lastIndexOf
(
".bam"
))
+
".breakdancer.tsv"
)
val
breakdancer
=
BreakdancerCaller
(
this
,
input
,
output_vcf
)
val
output_tsv
:
File
=
this
.
outputraw
val
breakdancer
=
BreakdancerCaller
(
this
,
bdcfg
.
output
,
output_tsv
)
add
(
breakdancer
)
outputFiles
+=
(
"breakdancer_tsv"
->
breakdancer
.
output
)
// val output_vcf: File = this.outputvcf
// convert this tsv to vcf using the python script
}
private
def
swapExtension
(
inputFile
:
String
)
=
inputFile
.
substring
(
0
,
inputFile
.
lastIndexOf
(
".bam"
))
+
".breakdancer.tsv"
//
private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".breakdancer.tsv"
}
object
Breakdancer
{
def
apply
(
root
:
Configurable
,
input
:
File
,
reference
:
File
,
runDir
:
String
)
:
Breakdancer
=
{
object
Breakdancer
extends
PipelineCommand
{
def
apply
(
root
:
Configurable
,
input
:
File
,
reference
:
File
,
runDir
:
String
)
:
Breakdancer
=
{
val
breakdancer
=
new
Breakdancer
(
root
)
breakdancer
.
input
=
input
breakdancer
.
reference
=
reference
breakdancer
.
workdir
=
runDir
breakdancer
.
init
breakdancer
.
biopetScript
return
breakdancer
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/Clever.scala
View file @
d980652c
...
...
@@ -29,10 +29,12 @@ class Clever(val root: Configurable) extends BiopetCommandLineFunction {
@Argument
(
doc
=
"Work directory"
)
var
workdir
:
String
=
_
var
cwd
:
String
=
_
@Output
(
doc
=
"Clever VCF output"
)
lazy
val
outputvcf
:
File
=
{
new
File
(
workdir
+
"predictions.vcf"
)
new
File
(
cwd
+
"predictions.vcf"
)
}
@Output
(
doc
=
"Clever raw output"
)
...
...
@@ -66,10 +68,11 @@ class Clever(val root: Configurable) extends BiopetCommandLineFunction {
}
object
Clever
{
def
apply
(
root
:
Configurable
,
input
:
File
,
reference
:
File
,
runDir
:
String
)
:
Clever
=
{
def
apply
(
root
:
Configurable
,
input
:
File
,
reference
:
File
,
svDir
:
String
,
runDir
:
String
)
:
Clever
=
{
val
clever
=
new
Clever
(
root
)
clever
.
input
=
input
clever
.
reference
=
reference
clever
.
cwd
=
svDir
clever
.
workdir
=
runDir
return
clever
}
...
...
@@ -89,6 +92,10 @@ class CleverPipeline(val root: Configurable) extends QScript with BiopetQScript
@Argument
(
doc
=
"Work directory"
)
var
workdir
:
String
=
_
@Argument
(
doc
=
"Current working directory"
)
var
cwd
:
String
=
_
override
def
init
()
{
}
...
...
@@ -97,7 +104,7 @@ class CleverPipeline(val root: Configurable) extends QScript with BiopetQScript
logger
.
info
(
"Starting Clever Pipeline"
)
/// start clever and then copy the vcf into the root directory "<sample>.clever/"
val
clever
=
Clever
(
this
,
input
,
reference
,
workdir
)
val
clever
=
Clever
(
this
,
input
,
reference
,
cwd
,
workdir
)
outputFiles
+=
(
"clever_vcf"
->
clever
.
outputvcf
)
add
(
clever
)
}
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala
View file @
d980652c
...
...
@@ -18,15 +18,12 @@ import org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.queue.function._
import
org.broadinstitute.gatk.queue.engine.JobRunInfo
class
Yamsvp
(
val
root
:
Configurable
)
extends
QScript
with
MultiSampleQScript
{
def
this
()
=
this
(
null
)
var
reference
:
File
=
_
var
finalBamFiles
:
List
[
File
]
=
Nil
class
LibraryOutput
extends
AbstractLibraryOutput
{
var
mappedBamFile
:
File
=
_
}
...
...
@@ -36,7 +33,6 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
var
mappedBamFile
:
File
=
_
}
override
def
init
()
{
for
(
file
<-
configfiles
)
globalConfig
.
loadConfigFile
(
file
)
reference
=
config
(
"reference"
,
required
=
true
)
...
...
@@ -53,14 +49,14 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
// read config and set all parameters for the pipeline
logger
.
info
(
"Starting YAM SV Pipeline"
)
runSamplesJobs
//
//
}
override
def
onExecutionDone
(
jobs
:
Map
[
QFunction
,
JobRunInfo
],
success
:
Boolean
)
{
logger
.
info
(
"YAM SV Pipeline has run ..............................................................."
)
}
def
runSingleSampleJobs
(
sampleConfig
:
Map
[
String
,
Any
])
:
SampleOutput
=
{
val
sampleOutput
=
new
SampleOutput
var
libraryBamfiles
:
List
[
File
]
=
List
()
...
...
@@ -88,20 +84,20 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
// val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf"
val
cleverDir
=
svcallingDir
+
sampleID
+
".clever/"
val
clever
=
Clever
(
this
,
bamFile
,
this
.
reference
,
cleverDir
)
val
clever
=
Clever
(
this
,
bamFile
,
this
.
reference
,
svcallingDir
,
cleverDir
)
sampleOutput
.
vcf
+=
(
"clever"
->
List
(
clever
.
outputvcf
))
add
(
clever
)
//
val breakdancerDir = sampleDir + sampleID + ".breakdancer/"
//
val breakdancer = Breakdancer(this, bamFile, this.reference, breakdancerDir
)
// outputFiles
+= ("breakdancer
_vcf
" -> List(breakdancer.output)
)
//
addAll(breakdancer.functions)
val
breakdancerDir
=
sampleDir
+
sampleID
+
".breakdancer/"
val
breakdancer
=
Breakdancer
(
this
,
bamFile
,
this
.
reference
,
breakdancerDir
)
sampleOutput
.
vcf
+=
(
"breakdancer"
->
List
(
breakdancer
.
output
raw
))
addAll
(
breakdancer
.
functions
)
return
sampleOutput
}
// Called for each run from a sample
def
runSingleLibraryJobs
(
runConfig
:
Map
[
String
,
Any
],
sampleConfig
:
Map
[
String
,
Any
])
:
LibraryOutput
=
{
val
libraryOutput
=
new
LibraryOutput
var
outputFiles
:
Map
[
String
,
File
]
=
Map
()
...
...
@@ -111,13 +107,13 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
val
runDir
:
String
=
alignmentDir
+
"run_"
+
runID
+
"/"
if
(
runConfig
.
contains
(
"R1"
))
{
// val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir)
// val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir)
val
mapping
=
new
Mapping
(
this
)
mapping
.
defaultAligner
=
"stampy"
mapping
.
skipFlexiprep
=
false
mapping
.
skipMarkduplicates
=
true
// we do the dedup marking using Sambamba
if
(
runConfig
.
contains
(
"R1"
))
mapping
.
input_R1
=
new
File
(
runConfig
(
"R1"
).
toString
)
if
(
runConfig
.
contains
(
"R2"
))
mapping
.
input_R2
=
new
File
(
runConfig
(
"R2"
).
toString
)
mapping
.
paired
=
(
mapping
.
input_R2
!=
null
)
...
...
@@ -133,16 +129,14 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
addAll
(
mapping
.
functions
)
// start sambamba dedup
// outputFiles += ("final_bam" -> mapping.outputFiles("finalBamFile"))
// outputFiles += ("final_bam" -> mapping.outputFiles("finalBamFile"))
libraryOutput
.
mappedBamFile
=
mapping
.
outputFiles
(
"finalBamFile"
)
}
else
this
.
logger
.
error
(
"Sample: "
+
sampleID
+
": No R1 found for run: "
+
runConfig
)
return
libraryOutput
// logger.debug(outputFiles)
// return outputFiles
// logger.debug(outputFiles)
// return outputFiles
}
}
object
Yamsvp
extends
PipelineCommand
{
override
val
pipeline
=
"/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.class"
}
object
Yamsvp
extends
PipelineCommand
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment