Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
827de641
Commit
827de641
authored
Nov 17, 2014
by
bow
Browse files
Merge branch 'feature-basty_generate_fasta' into 'develop'
Feature basty generate fasta for #31 See merge request !51
parents
109ff2b3
cbcd5dd0
Changes
8
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala
View file @
827de641
...
...
@@ -37,6 +37,7 @@ object BiopetExecutable extends Logging {
nl
.
lumc
.
sasc
.
biopet
.
tools
.
SageCountFastq
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
SageCreateLibrary
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
SageCreateTagCounts
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
BastyGenerateFasta
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
MergeAlleles
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
SamplesTsvToJson
)
)
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala
View file @
827de641
...
...
@@ -9,6 +9,8 @@ trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetC
javaGCHeapFreeLimit
=
config
(
"java_gc_heap_freelimit"
)
javaGCTimeLimit
=
config
(
"java_gc_timelimit"
)
override
def
javaOpts
=
super
.
javaOpts
+
optional
(
"-Dscala.concurrent.context.numThreads="
,
threads
,
spaceSeparated
=
false
,
escape
=
false
)
override
def
afterGraph
{
memoryLimit
=
config
(
"memory_limit"
)
}
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala
View file @
827de641
...
...
@@ -7,9 +7,9 @@ import java.io.File
class
Raxml
(
val
root
:
Configurable
)
extends
BiopetCommandLineFunction
{
override
val
defaultThreads
=
4
override
val
defaultThreads
=
1
override
def
versionCommand
=
executable
+
" -v"
override
val
versionRegex
=
""".*version
\w*
.*"""
.
r
override
val
versionRegex
=
""".*version
([\w\.]*)
.*"""
.
r
@Input
(
doc
=
"Input phy/fasta file"
,
required
=
true
)
var
input
:
File
=
_
...
...
@@ -32,8 +32,8 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction {
@Argument
(
doc
=
"Name of output files"
,
required
=
true
)
var
f
:
String
=
"d"
@Argument
(
doc
=
"Output directory"
,
required
=
fals
e
)
var
w
:
String
=
jobLocalDir
.
getAbsolutePath
@Argument
(
doc
=
"Output directory"
,
required
=
tru
e
)
var
w
:
String
=
_
@Input
(
required
=
false
)
var
t
:
File
=
_
...
...
@@ -44,18 +44,35 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction {
@Output
(
doc
=
"Output files"
,
required
=
false
)
private
var
out
:
List
[
File
]
=
Nil
executable
=
config
(
"exe"
,
default
=
"raxmlHPC"
)
var
executableNonThreads
:
String
=
config
(
"exe"
,
default
=
"raxmlHPC"
)
var
executableThreads
:
String
=
config
(
"exe_pthreads"
)
override
def
afterGraph
{
if
(
threads
==
0
)
threads
=
getThreads
(
defaultThreads
)
executable
=
if
(
threads
>
1
&&
executableThreads
!=
null
)
executableThreads
else
executableNonThreads
super
.
afterGraph
out
+:=
getInfoFile
f
match
{
case
"d"
if
b
.
isEmpty
=>
out
+:=
getBestTree
case
"d"
if
b
.
isDefined
=>
out
+:=
getBootstrap
case
"d"
if
b
.
isEmpty
=>
{
out
+:=
getBestTreeFile
for
(
t
<-
0
until
N
.
getOrElse
(
1
))
{
out
+:=
new
File
(
w
+
File
.
separator
+
"RAxML_log."
+
n
+
".RUN."
+
t
)
out
+:=
new
File
(
w
+
File
.
separator
+
"RAxML_parsimonyTree."
+
n
+
".RUN."
+
t
)
out
+:=
new
File
(
w
+
File
.
separator
+
"RAxML_result."
+
n
+
".RUN."
+
t
)
}
}
case
"d"
if
b
.
isDefined
=>
out
+:=
getBootstrapFile
case
"b"
=>
{
out
+:=
new
File
(
w
+
File
.
separator
+
"RAxML_bipartitionsBranchLabels."
+
n
)
out
+:=
new
File
(
w
+
File
.
separator
+
"RAxML_bipartitions."
+
n
)
}
case
_
=>
}
}
def
getBestTree
:
File
=
new
File
(
w
+
File
.
separator
+
"RAxML_bestTree."
+
n
)
def
getBootstrap
:
File
=
new
File
(
w
+
File
.
separator
+
"RAxML_bootstrap."
+
n
)
def
getBestTreeFile
:
File
=
new
File
(
w
+
File
.
separator
+
"RAxML_bestTree."
+
n
)
def
getBootstrapFile
:
File
=
new
File
(
w
+
File
.
separator
+
"RAxML_bootstrap."
+
n
)
def
getInfoFile
:
File
=
new
File
(
w
+
File
.
separator
+
"RAxML_info."
+
n
)
def
cmdLine
=
required
(
executable
)
+
required
(
"-m"
,
m
)
+
...
...
@@ -63,9 +80,10 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction {
optional
(
"-p"
,
p
)
+
optional
(
"-b"
,
b
)
+
optional
(
"-N"
,
N
)
+
optional
(
"-n"
,
n
)
+
optional
(
"-w"
,
w
)
+
optional
(
"-f"
,
f
)
+
optional
(
"-t"
,
t
)
+
optional
(
"-z"
,
z
)
+
(
if
(
threads
>
1
)
required
(
"-T"
,
threads
)
else
""
)
}
\ No newline at end of file
required
(
"-T"
,
threads
)
}
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala
View file @
827de641
package
nl.lumc.sasc.biopet.pipelines.basty
import
java.io.File
import
nl.lumc.sasc.biopet.core.MultiSampleQScript
import
nl.lumc.sasc.biopet.core.PipelineCommand
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.Cat
import
nl.lumc.sasc.biopet.extensions.Raxml
import
nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline
import
nl.lumc.sasc.biopet.tools.BastyGenerateFasta
import
org.broadinstitute.gatk.queue.QScript
class
Basty
(
val
root
:
Configurable
)
extends
QScript
with
MultiSampleQScript
{
...
...
@@ -12,15 +16,18 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
class
LibraryOutput
extends
AbstractLibraryOutput
{
}
case
class
FastaOutput
(
variants
:
File
,
consensus
:
File
,
consensusVariants
:
File
)
class
SampleOutput
extends
AbstractSampleOutput
{
var
output
:
FastaOutput
=
_
var
outputSnps
:
FastaOutput
=
_
}
defaults
++=
Map
(
"ploidy"
->
1
,
"use_haplotypecaller"
->
false
,
"use_unifiedgenotyper"
->
true
)
defaults
++=
Map
(
"ploidy"
->
1
,
"use_haplotypecaller"
->
false
,
"use_unifiedgenotyper"
->
true
,
"joint_variantcalling"
->
true
)
var
gatkPipeline
:
GatkPipeline
=
_
var
gatkPipeline
:
GatkPipeline
=
new
GatkPipeline
(
this
)
gatkPipeline
.
jointVariantcalling
=
true
def
init
()
{
gatkPipeline
=
new
GatkPipeline
(
this
)
gatkPipeline
.
outputDir
=
outputDir
gatkPipeline
.
init
}
...
...
@@ -29,17 +36,82 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
gatkPipeline
.
biopetScript
addAll
(
gatkPipeline
.
functions
)
val
refVariants
=
addGenerateFasta
(
null
,
outputDir
+
"reference/"
,
outputName
=
"reference"
)
val
refVariantSnps
=
addGenerateFasta
(
null
,
outputDir
+
"reference/"
,
outputName
=
"reference"
,
snpsOnly
=
true
)
runSamplesJobs
()
val
catVariants
=
Cat
(
this
,
refVariants
.
variants
::
samplesOutput
.
map
(
_
.
_2
.
output
.
variants
).
toList
,
outputDir
+
"fastas/variant.fasta"
)
add
(
catVariants
)
val
catVariantsSnps
=
Cat
(
this
,
refVariantSnps
.
variants
::
samplesOutput
.
map
(
_
.
_2
.
outputSnps
.
variants
).
toList
,
outputDir
+
"fastas/variant.snps_only.fasta"
)
add
(
catVariantsSnps
)
val
catConsensus
=
Cat
(
this
,
refVariants
.
consensus
::
samplesOutput
.
map
(
_
.
_2
.
output
.
consensus
).
toList
,
outputDir
+
"fastas/consensus.fasta"
)
add
(
catConsensus
)
val
catConsensusSnps
=
Cat
(
this
,
refVariantSnps
.
consensus
::
samplesOutput
.
map
(
_
.
_2
.
outputSnps
.
consensus
).
toList
,
outputDir
+
"fastas/consensus.snps_only.fasta"
)
add
(
catConsensusSnps
)
val
catConsensusVariants
=
Cat
(
this
,
refVariants
.
consensusVariants
::
samplesOutput
.
map
(
_
.
_2
.
output
.
consensusVariants
).
toList
,
outputDir
+
"fastas/consensus.variant.fasta"
)
add
(
catConsensusVariants
)
val
catConsensusVariantsSnps
=
Cat
(
this
,
refVariantSnps
.
consensusVariants
::
samplesOutput
.
map
(
_
.
_2
.
outputSnps
.
consensusVariants
).
toList
,
outputDir
+
"fastas/consensus.variant.snps_only.fasta"
)
add
(
catConsensusVariantsSnps
)
val
seed
:
Int
=
config
(
"seed"
,
default
=
12345
)
def
addRaxml
(
input
:
File
,
outputDir
:
String
,
outputName
:
String
)
{
val
raxmlMl
=
new
Raxml
(
this
)
raxmlMl
.
input
=
input
raxmlMl
.
m
=
config
(
"raxml_ml_model"
,
default
=
"GTRGAMMAX"
)
raxmlMl
.
p
=
seed
raxmlMl
.
n
=
outputName
+
"_ml"
raxmlMl
.
w
=
outputDir
raxmlMl
.
N
=
config
(
"ml_runs"
,
default
=
20
,
submodule
=
"raxml"
)
add
(
raxmlMl
)
val
r
=
new
scala
.
util
.
Random
(
seed
)
val
numBoot
=
config
(
"boot_runs"
,
default
=
100
,
submodule
=
"raxml"
).
getInt
val
bootList
=
for
(
t
<-
0
until
numBoot
)
yield
{
val
raxmlBoot
=
new
Raxml
(
this
)
raxmlBoot
.
threads
=
1
raxmlBoot
.
input
=
input
raxmlBoot
.
m
=
config
(
"raxml_ml_model"
,
default
=
"GTRGAMMAX"
)
raxmlBoot
.
p
=
seed
raxmlBoot
.
b
=
math
.
abs
(
r
.
nextInt
)
raxmlBoot
.
w
=
outputDir
raxmlBoot
.
N
=
1
raxmlBoot
.
n
=
outputName
+
"_boot_"
+
t
add
(
raxmlBoot
)
raxmlBoot
.
getBootstrapFile
}
val
cat
=
Cat
(
this
,
bootList
.
toList
,
outputDir
+
"/boot_list"
)
add
(
cat
)
val
raxmlBi
=
new
Raxml
(
this
)
raxmlBi
.
input
=
input
raxmlBi
.
t
=
raxmlMl
.
getBestTreeFile
raxmlBi
.
z
=
cat
.
output
raxmlBi
.
m
=
config
(
"raxml_ml_model"
,
default
=
"GTRGAMMAX"
)
raxmlBi
.
p
=
seed
raxmlBi
.
f
=
"b"
raxmlBi
.
n
=
outputName
+
"_bi"
raxmlBi
.
w
=
outputDir
add
(
raxmlBi
)
}
addRaxml
(
catVariantsSnps
.
output
,
outputDir
+
"raxml"
,
"snps"
)
}
// Called for each sample
def
runSingleSampleJobs
(
sampleConfig
:
Map
[
String
,
Any
])
:
SampleOutput
=
{
val
sampleOutput
=
new
SampleOutput
val
sampleID
:
String
=
sampleConfig
(
"ID"
).
toString
val
sampleDir
=
globalSampleDir
+
sampleID
val
sampleDir
=
globalSampleDir
+
sampleID
+
"/"
sampleOutput
.
libraries
=
runLibraryJobs
(
sampleConfig
)
sampleOutput
.
output
=
addGenerateFasta
(
sampleID
,
sampleDir
)
sampleOutput
.
outputSnps
=
addGenerateFasta
(
sampleID
,
sampleDir
,
snpsOnly
=
true
)
return
sampleOutput
}
...
...
@@ -53,6 +125,23 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
return
libraryOutput
}
def
addGenerateFasta
(
sampleName
:
String
,
outputDir
:
String
,
outputName
:
String
=
null
,
snpsOnly
:
Boolean
=
false
)
:
FastaOutput
=
{
val
bastyGenerateFasta
=
new
BastyGenerateFasta
(
this
)
bastyGenerateFasta
.
outputName
=
if
(
outputName
!=
null
)
outputName
else
sampleName
bastyGenerateFasta
.
inputVcf
=
gatkPipeline
.
multisampleVariantcalling
.
scriptOutput
.
finalVcfFile
if
(
gatkPipeline
.
samplesOutput
.
contains
(
sampleName
))
{
bastyGenerateFasta
.
bamFile
=
gatkPipeline
.
samplesOutput
(
sampleName
).
variantcalling
.
bamFiles
.
head
}
bastyGenerateFasta
.
outputVariants
=
outputDir
+
bastyGenerateFasta
.
outputName
+
".variants"
+
(
if
(
snpsOnly
)
".snps_only"
else
""
)
+
".fasta"
bastyGenerateFasta
.
outputConsensus
=
outputDir
+
bastyGenerateFasta
.
outputName
+
".consensus"
+
(
if
(
snpsOnly
)
".snps_only"
else
""
)
+
".fasta"
bastyGenerateFasta
.
outputConsensusVariants
=
outputDir
+
bastyGenerateFasta
.
outputName
+
".consensus_variants"
+
(
if
(
snpsOnly
)
".snps_only"
else
""
)
+
".fasta"
bastyGenerateFasta
.
sampleName
=
sampleName
bastyGenerateFasta
.
snpsOnly
=
snpsOnly
add
(
bastyGenerateFasta
)
return
FastaOutput
(
bastyGenerateFasta
.
outputVariants
,
bastyGenerateFasta
.
outputConsensus
,
bastyGenerateFasta
.
outputConsensusVariants
)
}
}
object
Basty
extends
PipelineCommand
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala
View file @
827de641
...
...
@@ -59,6 +59,11 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
else
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
}
val
multisampleVariantcalling
=
new
GatkVariantcalling
(
this
)
{
override
protected
lazy
val
configName
=
"gatkvariantcalling"
override
def
configPath
:
List
[
String
]
=
"multisample"
::
super
.
configPath
}
def
biopetScript
()
{
if
(
onlySample
.
isEmpty
)
{
runSamplesJobs
...
...
@@ -92,23 +97,19 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
val
cvRaw
=
CombineVariants
(
this
,
allRawVcfFiles
.
toList
,
outputDir
+
"variantcalling/multisample.raw.vcf.gz"
)
add
(
cvRaw
)
val
gatkVariantcalling
=
new
GatkVariantcalling
(
this
)
{
override
protected
lazy
val
configName
=
"gatkvariantcalling"
override
def
configPath
:
List
[
String
]
=
"multisample"
::
super
.
configPath
}
gatkVariantcalling
.
preProcesBams
=
Some
(
false
)
gatkVariantcalling
.
doublePreProces
=
Some
(
false
)
gatkVariantcalling
.
inputBams
=
allBamfiles
.
toList
gatkVariantcalling
.
rawVcfInput
=
cvRaw
.
out
gatkVariantcalling
.
outputDir
=
outputDir
+
"variantcalling"
gatkVariantcalling
.
outputName
=
"multisample"
gatkVariantcalling
.
init
gatkVariantcalling
.
biopetScript
addAll
(
gatkVariantcalling
.
functions
)
multisampleVariantcalling
.
preProcesBams
=
false
multisampleVariantcalling
.
doublePreProces
=
false
multisampleVariantcalling
.
inputBams
=
allBamfiles
.
toList
multisampleVariantcalling
.
rawVcfInput
=
cvRaw
.
out
multisampleVariantcalling
.
outputDir
=
outputDir
+
"variantcalling"
multisampleVariantcalling
.
outputName
=
"multisample"
multisampleVariantcalling
.
init
multisampleVariantcalling
.
biopetScript
addAll
(
multisampleVariantcalling
.
functions
)
if
(
config
(
"inputtype"
,
default
=
"dna"
).
getString
!=
"rna"
&&
config
(
"recalibration"
,
default
=
false
).
getBoolean
)
{
val
recalibration
=
new
GatkVariantRecalibration
(
this
)
recalibration
.
inputVcf
=
gatk
Variantcalling
.
scriptOutput
.
finalVcfFile
recalibration
.
inputVcf
=
multisample
Variantcalling
.
scriptOutput
.
finalVcfFile
recalibration
.
bamFiles
=
finalBamFiles
recalibration
.
outputDir
=
outputDir
+
"recalibration/"
recalibration
.
init
...
...
@@ -134,10 +135,10 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
val
gatkVariantcalling
=
new
GatkVariantcalling
(
this
)
gatkVariantcalling
.
inputBams
=
libraryBamfiles
gatkVariantcalling
.
outputDir
=
sampleDir
+
"/variantcalling/"
gatkVariantcalling
.
preProcesBams
=
Some
(
false
)
gatkVariantcalling
.
preProcesBams
=
false
if
(!
singleSampleCalling
)
{
gatkVariantcalling
.
useHaplotypecaller
=
Some
(
false
)
gatkVariantcalling
.
useUnifiedGenotyper
=
Some
(
false
)
gatkVariantcalling
.
useHaplotypecaller
=
false
gatkVariantcalling
.
useUnifiedGenotyper
=
false
}
gatkVariantcalling
.
sampleID
=
sampleID
gatkVariantcalling
.
init
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala
View file @
827de641
...
...
@@ -8,7 +8,7 @@ import nl.lumc.sasc.biopet.extensions.gatk.{ AnalyzeCovariates, BaseRecalibrator
import
nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
,
Argument
}
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Argument
}
import
scala.collection.SortedMap
import
scala.language.reflectiveCalls
...
...
@@ -41,25 +41,32 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
var
useHaplotypecaller
:
Option
[
Boolean
]
=
config
(
"use_haplotypecaller"
,
default
=
true
)
var
useUnifiedGenotyper
:
Option
[
Boolean
]
=
config
(
"use_unifiedgenotyper"
,
default
=
false
)
var
useAllelesOption
:
Option
[
Boolean
]
=
config
(
"use_alleles_option"
,
default
=
false
)
var
useIndelRealigner
:
Boolean
=
config
(
"use_indel_realign"
,
default
=
true
)
var
useBaseRecalibration
:
Boolean
=
config
(
"use_base_recalibration"
,
default
=
true
)
def
init
()
{
if
(
outputName
==
null
&&
sampleID
!=
null
)
outputName
=
sampleID
else
if
(
outputName
==
null
)
outputName
=
"noname"
if
(
outputDir
==
null
)
throw
new
IllegalStateException
(
"Missing Output directory on gatk module"
)
else
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
val
baseRecalibrator
=
new
BaseRecalibrator
(
this
)
if
(
preProcesBams
&&
useBaseRecalibration
&&
baseRecalibrator
.
knownSites
.
isEmpty
)
{
logger
.
warn
(
"No Known site found, skipping base recalibration"
)
useBaseRecalibration
=
false
}
}
private
def
doublePreProces
(
files
:
List
[
File
])
:
List
[
File
]
=
{
if
(
files
.
size
==
1
)
return
files
if
(
files
.
isEmpty
)
throw
new
IllegalStateException
(
"Files can't be empty"
)
if
(!
doublePreProces
.
get
)
return
files
val
markDu
b
=
MarkDuplicates
(
this
,
files
,
new
File
(
outputDir
+
outputName
+
".dedup.bam"
))
if
(
dbsnp
!=
null
)
{
add
(
markDub
,
isIntermediate
=
true
)
List
(
addIndelRealign
(
markDu
b
.
output
,
outputDir
,
isIntermediate
=
false
))
val
markDu
p
=
MarkDuplicates
(
this
,
files
,
new
File
(
outputDir
+
outputName
+
".dedup.bam"
))
add
(
markDup
,
isIntermediate
=
useIndelRealigner
)
if
(
useIndelRealigner
)
{
List
(
addIndelRealign
(
markDu
p
.
output
,
outputDir
,
isIntermediate
=
false
))
}
else
{
add
(
markDub
,
isIntermediate
=
true
)
List
(
markDub
.
output
)
List
(
markDup
.
output
)
}
}
...
...
@@ -67,8 +74,14 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
scriptOutput
.
bamFiles
=
if
(
preProcesBams
.
get
)
{
var
bamFiles
:
List
[
File
]
=
Nil
for
(
inputBam
<-
inputBams
)
{
var
bamFile
=
addIndelRealign
(
inputBam
,
outputDir
)
bamFiles
:+=
addBaseRecalibrator
(
bamFile
,
outputDir
,
isIntermediate
=
bamFiles
.
size
>
1
)
var
bamFile
=
inputBam
if
(
useIndelRealigner
)
{
bamFile
=
addIndelRealign
(
bamFile
,
outputDir
,
isIntermediate
=
useBaseRecalibration
)
}
if
(
useBaseRecalibration
)
{
bamFile
=
addBaseRecalibrator
(
bamFile
,
outputDir
,
isIntermediate
=
bamFiles
.
size
>
1
)
}
bamFiles
:+=
bamFile
}
doublePreProces
(
bamFiles
)
}
else
if
(
inputBams
.
size
>
1
&&
doublePreProces
.
get
)
{
...
...
@@ -188,7 +201,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
}
def
addBaseRecalibrator
(
inputBam
:
File
,
dir
:
String
,
isIntermediate
:
Boolean
=
false
)
:
File
=
{
val
baseRecalibrator
=
BaseRecalibrator
(
this
,
inputBam
,
swapExt
(
dir
,
inputBam
,
".bam"
,
".baserecal"
))
//with gatkArguments {
val
baseRecalibrator
=
BaseRecalibrator
(
this
,
inputBam
,
swapExt
(
dir
,
inputBam
,
".bam"
,
".baserecal"
))
if
(
baseRecalibrator
.
knownSites
.
isEmpty
)
{
logger
.
warn
(
"No Known site found, skipping base recalibration, file: "
+
inputBam
)
...
...
@@ -196,7 +209,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
}
add
(
baseRecalibrator
)
val
baseRecalibratorAfter
=
BaseRecalibrator
(
this
,
inputBam
,
swapExt
(
dir
,
inputBam
,
".bam"
,
".baserecal.after"
))
//with gatkArguments {
val
baseRecalibratorAfter
=
BaseRecalibrator
(
this
,
inputBam
,
swapExt
(
dir
,
inputBam
,
".bam"
,
".baserecal.after"
))
baseRecalibratorAfter
.
BQSR
=
baseRecalibrator
.
o
add
(
baseRecalibratorAfter
)
...
...
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala
0 → 100644
View file @
827de641
package
nl.lumc.sasc.biopet.tools
import
htsjdk.samtools.SamReaderFactory
import
htsjdk.samtools.reference.IndexedFastaSequenceFile
import
htsjdk.variant.variantcontext.VariantContext
import
htsjdk.variant.vcf.VCFFileReader
import
java.io.File
import
java.io.PrintWriter
import
nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import
nl.lumc.sasc.biopet.core.ToolCommand
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
scala.collection.JavaConversions._
import
nl.lumc.sasc.biopet.utils.VcfUtils._
import
scala.collection.mutable.ListBuffer
class
BastyGenerateFasta
(
val
root
:
Configurable
)
extends
BiopetJavaCommandLineFunction
{
javaMainClass
=
getClass
.
getName
@Input
(
doc
=
"Input vcf file"
,
required
=
false
)
var
inputVcf
:
File
=
_
@Input
(
doc
=
"Bam File"
,
required
=
false
)
var
bamFile
:
File
=
_
@Input
(
doc
=
"reference"
,
required
=
false
)
var
reference
:
File
=
config
(
"reference"
)
@Output
(
doc
=
"Output fasta, variants only"
,
required
=
false
)
var
outputVariants
:
File
=
_
@Output
(
doc
=
"Output fasta, variants only"
,
required
=
false
)
var
outputConsensus
:
File
=
_
@Output
(
doc
=
"Output fasta, variants only"
,
required
=
false
)
var
outputConsensusVariants
:
File
=
_
var
snpsOnly
:
Boolean
=
config
(
"snps_only"
,
default
=
false
)
var
sampleName
:
String
=
_
var
minAD
:
Int
=
config
(
"min_ad"
,
default
=
8
)
var
minDepth
:
Int
=
config
(
"min_depth"
,
default
=
8
)
var
outputName
:
String
=
_
override
val
defaultVmem
=
"8G"
memoryLimit
=
Option
(
4.0
)
override
def
commandLine
=
super
.
commandLine
+
optional
(
"--inputVcf"
,
inputVcf
)
+
optional
(
"--bamFile"
,
bamFile
)
+
optional
(
"--outputVariants"
,
outputVariants
)
+
optional
(
"--outputConsensus"
,
outputConsensus
)
+
optional
(
"--outputConsensusVariants"
,
outputConsensusVariants
)
+
conditional
(
snpsOnly
,
"--snpsOnly"
)
+
optional
(
"--sampleName"
,
sampleName
)
+
required
(
"--outputName"
,
outputName
)
+
optional
(
"--minAD"
,
minAD
)
+
optional
(
"--minDepth"
,
minDepth
)
+
optional
(
"--reference"
,
reference
)
}
object
BastyGenerateFasta
extends
ToolCommand
{
case
class
Args
(
inputVcf
:
File
=
null
,
outputVariants
:
File
=
null
,
outputConsensus
:
File
=
null
,
outputConsensusVariants
:
File
=
null
,
bamFile
:
File
=
null
,
snpsOnly
:
Boolean
=
false
,
sampleName
:
String
=
null
,
outputName
:
String
=
null
,
minAD
:
Int
=
8
,
minDepth
:
Int
=
8
,
reference
:
File
=
null
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'V'
,
"inputVcf"
)
unbounded
()
valueName
(
"<file>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
inputVcf
=
x
)
}
text
(
"vcf file, needed for outputVariants and outputConsensusVariants"
)
validate
{
x
=>
if
(
x
.
exists
)
success
else
failure
(
"File does not exist: "
+
x
)
}
opt
[
File
](
"bamFile"
)
unbounded
()
valueName
(
"<file>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
bamFile
=
x
)
}
text
(
"bam file, needed for outputConsensus and outputConsensusVariants"
)
validate
{
x
=>
if
(
x
.
exists
)
success
else
failure
(
"File does not exist: "
+
x
)
}
opt
[
File
](
"outputVariants"
)
maxOccurs
(
1
)
unbounded
()
valueName
(
"<file>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
outputVariants
=
x
)
}
text
(
"fasta with only variants from vcf file"
)
opt
[
File
](
"outputConsensus"
)
maxOccurs
(
1
)
unbounded
()
valueName
(
"<file>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
outputConsensus
=
x
)
}
text
(
"Consensus fasta from bam, always reference bases else 'N'"
)
opt
[
File
](
"outputConsensusVariants"
)
maxOccurs
(
1
)
unbounded
()
valueName
(
"<file>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
outputConsensusVariants
=
x
)
}
text
(
"Consensus fasta from bam with variants from vcf file, always reference bases else 'N'"
)
opt
[
Unit
](
"snpsOnly"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
snpsOnly
=
true
)
}
text
(
"Only use snps from vcf file"
)
opt
[
String
](
"sampleName"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
sampleName
=
x
)
}
text
(
"Sample name in vcf file"
)
opt
[
String
](
"outputName"
)
required
()
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
outputName
=
x
)
}
text
(
"Output name in fasta file header"
)
opt
[
Int
](
"minAD"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
minAD
=
x
)
}
text
(
"min AD value in vcf file for sample"
)
opt
[
Int
](
"minDepth"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
minDepth
=
x
)
}
text
(
"min detp in bam file"
)
opt
[
File
](
"reference"
)
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
reference
=
x
)
}
text
(
"Indexed reference fasta file"
)
validate
{
x
=>
if
(
x
.
exists
)
success
else
failure
(
"File does not exist: "
+
x
)
}
checkConfig
{
c
=>
{
val
err
:
ListBuffer
[
String
]
=
ListBuffer
()
if
(
c
.
outputConsensus
!=
null
||
c
.
outputConsensusVariants
!=
null
)
{
if
(
c
.
reference
==
null
)
err
.
add
(
"No reference suplied"
)
else
{
val
index
=
new
File
(
c
.
reference
.
getAbsolutePath
+
".fai"
)
if
(!
index
.
exists
)
err
.
add
(
"Reference does not have index"
)
}
if
(
c
.
outputConsensusVariants
!=
null
&&
c
.
inputVcf
==
null
)
err
.
add
(
"To write outputVariants input vcf is required, please use --inputVcf option"
)
if
(
c
.
sampleName
!=
null
&&
c
.
bamFile
==
null
)
err
.
add
(
"To write Consensus input bam file is required, please use --bamFile option"
)
}
if
(
c
.
outputVariants
!=
null
&&
c
.
inputVcf
==
null
)
err
.
add
(
"To write outputVariants input vcf is required, please use --inputVcf option"
)
if
(
c
.
outputVariants
==
null
&&
c
.
outputConsensus
==
null
&&
c
.
outputConsensusVariants
==
null
)
err
.
add
(
"No output file selected"
)
if
(
err
.
isEmpty
)
success
else
failure
(
err
.
mkString
(
""
,
"\nError: "
,
"\n"
))
}
}
}
protected
var
cmdArgs
:
Args
=
_
private
val
chunkSize
=
100000
/**
* @param args the command line arguments
*/
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
val
argsParser
=
new
OptParser
cmdArgs
=
argsParser
.
parse
(
args
,
Args
())
getOrElse
sys
.
exit
(
1
)
if
(
cmdArgs
.
outputVariants
!=
null
)
writeVariantsOnly
()
if
(
cmdArgs
.
outputConsensus
!=
null
||
cmdArgs
.
outputConsensusVariants
!=
null
)
writeConsensus
()
}
protected
def
writeConsensus
()
{
val
referenceFile
=
new
IndexedFastaSequenceFile
(
cmdArgs
.
reference
)
val
referenceDict
=
referenceFile
.
getSequenceDictionary
for
(
chr
<-
referenceDict
.
getSequences
)
{
val
chunks
=
(
for
(
chunk
<-
(
0
to
(
chr
.
getSequenceLength
/
chunkSize
)).
par
)
yield
{
val
chrName
=
chr
.
getSequenceName
val
begin
=
chunk
*
chunkSize
+
1
val
end
=
{
val
e
=
(
chunk
+
1
)
*
chunkSize
if
(
e
>
chr
.
getSequenceLength
)
chr
.
getSequenceLength
else
e
}
logger
.
info
(
"begin on: chrName: "
+
chrName
+
" begin: "
+
begin
+
" end: "
+
end
)
val
referenceSequence
=
referenceFile
.
getSubsequenceAt
(
chrName
,
begin
,
end
)
val
variants
:
Map
[(
Int
,
Int
)
,
VariantContext
]
=
if
(
cmdArgs
.
inputVcf
!=
null
)
{
val
reader
=
new
VCFFileReader
(
cmdArgs
.
inputVcf
,
true
)
(
for
(
variant
<-
reader
.
query
(
chrName
,
begin
,
end
)
if
(!
cmdArgs
.
snpsOnly
||
variant
.
isSNP
))
yield
{
(
variant
.