Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
91e72434
Commit
91e72434
authored
Jan 27, 2015
by
Peter van 't Hof
Browse files
Switch Gatk pipeline and Basty to new multisample classes
parent
49ba8c53
Changes
7
Hide whitespace changes
Inline
Side-by-side
protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala
View file @
91e72434
...
...
@@ -12,24 +12,40 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.
{
RunGubbins
,
Cat
,
Raxml
}
import
nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline
import
nl.lumc.sasc.biopet.tools.BastyGenerateFasta
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
org.broadinstitute.gatk.queue.QScript
class
Basty
(
val
root
:
Configurable
)
extends
QScript
with
MultiSampleQScript
{
qscript
=>
def
this
()
=
this
(
null
)
class
LibraryOutput
extends
AbstractLibraryOutput
{
}
case
class
FastaOutput
(
variants
:
File
,
consensus
:
File
,
consensusVariants
:
File
)
class
SampleOutput
extends
AbstractSampleOutput
{
override
def
defaults
=
ConfigUtils
.
mergeMaps
(
Map
(
"ploidy"
->
1
,
"use_haplotypecaller"
->
false
,
"use_unifiedgenotyper"
->
true
,
"joint_variantcalling"
->
true
),
super
.
defaults
)
var
gatkPipeline
:
GatkPipeline
=
new
GatkPipeline
(
qscript
)
def
makeSample
(
id
:
String
)
=
new
Sample
(
id
)
class
Sample
(
sampleId
:
String
)
extends
AbstractSample
(
sampleId
)
{
def
makeLibrary
(
id
:
String
)
=
new
Library
(
id
)
class
Library
(
libraryId
:
String
)
extends
AbstractLibrary
(
libraryId
)
{
def
addJobs
()
:
Unit
=
{}
}
var
output
:
FastaOutput
=
_
var
outputSnps
:
FastaOutput
=
_
}
defaults
++=
Map
(
"ploidy"
->
1
,
"use_haplotypecaller"
->
false
,
"use_unifiedgenotyper"
->
true
,
"joint_variantcalling"
->
true
)
var
gatkPipeline
:
GatkPipeline
=
new
GatkPipeline
(
this
)
gatkPipeline
.
jointVariantcalling
=
true
def
addJobs
()
:
Unit
=
{
runLibraryJobs
()
output
=
addGenerateFasta
(
sampleId
,
sampleDir
)
outputSnps
=
addGenerateFasta
(
sampleId
,
sampleDir
,
snpsOnly
=
true
)
}
}
def
init
()
{
gatkPipeline
.
outputDir
=
outputDir
...
...
@@ -45,19 +61,19 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
runSamplesJobs
()
val
catVariants
=
Cat
(
this
,
refVariants
.
variants
::
samples
Output
.
map
(
_
.
_2
.
output
.
variants
).
toList
,
outputDir
+
"fastas/variant.fasta"
)
val
catVariants
=
Cat
(
this
,
refVariants
.
variants
::
samples
.
map
(
_
.
_2
.
output
.
variants
).
toList
,
outputDir
+
"fastas/variant.fasta"
)
add
(
catVariants
)
val
catVariantsSnps
=
Cat
(
this
,
refVariantSnps
.
variants
::
samples
Output
.
map
(
_
.
_2
.
outputSnps
.
variants
).
toList
,
outputDir
+
"fastas/variant.snps_only.fasta"
)
val
catVariantsSnps
=
Cat
(
this
,
refVariantSnps
.
variants
::
samples
.
map
(
_
.
_2
.
outputSnps
.
variants
).
toList
,
outputDir
+
"fastas/variant.snps_only.fasta"
)
add
(
catVariantsSnps
)
val
catConsensus
=
Cat
(
this
,
refVariants
.
consensus
::
samples
Output
.
map
(
_
.
_2
.
output
.
consensus
).
toList
,
outputDir
+
"fastas/consensus.fasta"
)
val
catConsensus
=
Cat
(
this
,
refVariants
.
consensus
::
samples
.
map
(
_
.
_2
.
output
.
consensus
).
toList
,
outputDir
+
"fastas/consensus.fasta"
)
add
(
catConsensus
)
val
catConsensusSnps
=
Cat
(
this
,
refVariantSnps
.
consensus
::
samples
Output
.
map
(
_
.
_2
.
outputSnps
.
consensus
).
toList
,
outputDir
+
"fastas/consensus.snps_only.fasta"
)
val
catConsensusSnps
=
Cat
(
this
,
refVariantSnps
.
consensus
::
samples
.
map
(
_
.
_2
.
outputSnps
.
consensus
).
toList
,
outputDir
+
"fastas/consensus.snps_only.fasta"
)
add
(
catConsensusSnps
)
val
catConsensusVariants
=
Cat
(
this
,
refVariants
.
consensusVariants
::
samples
Output
.
map
(
_
.
_2
.
output
.
consensusVariants
).
toList
,
outputDir
+
"fastas/consensus.variant.fasta"
)
val
catConsensusVariants
=
Cat
(
this
,
refVariants
.
consensusVariants
::
samples
.
map
(
_
.
_2
.
output
.
consensusVariants
).
toList
,
outputDir
+
"fastas/consensus.variant.fasta"
)
add
(
catConsensusVariants
)
val
catConsensusVariantsSnps
=
Cat
(
this
,
refVariantSnps
.
consensusVariants
::
samples
Output
.
map
(
_
.
_2
.
outputSnps
.
consensusVariants
).
toList
,
outputDir
+
"fastas/consensus.variant.snps_only.fasta"
)
val
catConsensusVariantsSnps
=
Cat
(
this
,
refVariantSnps
.
consensusVariants
::
samples
.
map
(
_
.
_2
.
outputSnps
.
consensusVariants
).
toList
,
outputDir
+
"fastas/consensus.variant.snps_only.fasta"
)
add
(
catConsensusVariantsSnps
)
val
seed
:
Int
=
config
(
"seed"
,
default
=
12345
)
...
...
@@ -115,42 +131,20 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
addTreeJobs
(
catVariants
.
output
,
catConsensusVariants
.
output
,
outputDir
+
"trees"
+
File
.
separator
+
"snps_indels"
,
"snps_indels"
)
}
// Called for each sample
def
runSingleSampleJobs
(
sampleID
:
String
)
:
SampleOutput
=
{
val
sampleOutput
=
new
SampleOutput
val
sampleDir
=
globalSampleDir
+
sampleID
+
"/"
sampleOutput
.
libraries
=
runLibraryJobs
(
sampleID
)
sampleOutput
.
output
=
addGenerateFasta
(
sampleID
,
sampleDir
)
sampleOutput
.
outputSnps
=
addGenerateFasta
(
sampleID
,
sampleDir
,
snpsOnly
=
true
)
return
sampleOutput
}
// Called for each run from a sample
def
runSingleLibraryJobs
(
libraryID
:
String
,
sampleID
:
String
)
:
LibraryOutput
=
{
val
libraryOutput
=
new
LibraryOutput
val
runDir
:
String
=
globalSampleDir
+
sampleID
+
"/run_"
+
libraryID
+
"/"
return
libraryOutput
}
def
addGenerateFasta
(
sampleName
:
String
,
outputDir
:
String
,
outputName
:
String
=
null
,
snpsOnly
:
Boolean
=
false
)
:
FastaOutput
=
{
val
bastyGenerateFasta
=
new
BastyGenerateFasta
(
this
)
bastyGenerateFasta
.
outputName
=
if
(
outputName
!=
null
)
outputName
else
sampleName
bastyGenerateFasta
.
inputVcf
=
gatkPipeline
.
multisampleVariantcalling
.
scriptOutput
.
finalVcfFile
if
(
gatkPipeline
.
samples
Output
.
contains
(
sampleName
))
{
bastyGenerateFasta
.
bamFile
=
gatkPipeline
.
samples
Output
(
sampleName
).
v
ariantcalling
.
bamFiles
.
head
if
(
gatkPipeline
.
samples
.
contains
(
sampleName
))
{
bastyGenerateFasta
.
bamFile
=
gatkPipeline
.
samples
(
sampleName
).
gatkV
ariantcalling
.
scriptOutput
.
bamFiles
.
head
}
bastyGenerateFasta
.
outputVariants
=
outputDir
+
bastyGenerateFasta
.
outputName
+
".variants"
+
(
if
(
snpsOnly
)
".snps_only"
else
""
)
+
".fasta"
bastyGenerateFasta
.
outputConsensus
=
outputDir
+
bastyGenerateFasta
.
outputName
+
".consensus"
+
(
if
(
snpsOnly
)
".snps_only"
else
""
)
+
".fasta"
bastyGenerateFasta
.
outputConsensusVariants
=
outputDir
+
bastyGenerateFasta
.
outputName
+
".consensus_variants"
+
(
if
(
snpsOnly
)
".snps_only"
else
""
)
+
".fasta"
bastyGenerateFasta
.
sampleName
=
sampleName
bastyGenerateFasta
.
snpsOnly
=
snpsOnly
add
(
bastyGenerateFasta
)
qscript
.
add
(
bastyGenerateFasta
)
return
FastaOutput
(
bastyGenerateFasta
.
outputVariants
,
bastyGenerateFasta
.
outputConsensus
,
bastyGenerateFasta
.
outputConsensusVariants
)
}
}
...
...
protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala
View file @
91e72434
...
...
@@ -9,13 +9,17 @@ import java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
class
ApplyRecalibration
(
val
root
:
Configurable
)
extends
org
.
broadinstitute
.
gatk
.
queue
.
extensions
.
gatk
.
ApplyRecalibration
with
GatkGeneral
{
scatterCount
=
config
(
"scattercount"
,
default
=
0
)
override
def
afterGraph
{
super
.
afterGraph
if
(
config
.
contains
(
"scattercount"
))
scatterCount
=
config
(
"scattercount"
)
nt
=
Option
(
getThreads
(
3
))
memoryLimit
=
Option
(
nt
.
getOrElse
(
1
)
*
2
)
import
org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode
if
(
mode
==
Mode
.
INDEL
)
ts_filter_level
=
config
(
"ts_filter_level"
,
default
=
99.0
)
else
if
(
mode
==
Mode
.
SNP
)
ts_filter_level
=
config
(
"ts_filter_level"
,
default
=
99.5
)
ts_filter_level
=
config
(
"ts_filter_level"
)
}
}
...
...
@@ -24,11 +28,9 @@ object ApplyRecalibration {
def
apply
(
root
:
Configurable
,
input
:
File
,
output
:
File
,
recal_file
:
File
,
tranches_file
:
File
,
indel
:
Boolean
=
false
)
:
ApplyRecalibration
=
{
val
ar
=
if
(
indel
)
new
ApplyRecalibration
(
root
)
{
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
INDEL
defaults
++=
Map
(
"ts_filter_level"
->
99.0
)
}
else
new
ApplyRecalibration
(
root
)
{
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
SNP
defaults
++=
Map
(
"ts_filter_level"
->
99.5
)
}
ar
.
input
:+=
input
ar
.
recal_file
=
recal_file
...
...
protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala
View file @
91e72434
...
...
@@ -27,11 +27,9 @@ object VariantRecalibrator {
override
def
configPath
:
List
[
String
]
=
(
if
(
indel
)
"indel"
else
"snp"
)
::
super
.
configPath
if
(
indel
)
{
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
INDEL
defaults
++=
Map
(
"ts_filter_level"
->
99.0
)
if
(
config
.
contains
(
"mills"
))
resource
:+=
new
TaggedFile
(
config
(
"mills"
).
asString
,
"known=false,training=true,truth=true,prior=12.0"
)
}
else
{
mode
=
org
.
broadinstitute
.
gatk
.
tools
.
walkers
.
variantrecalibration
.
VariantRecalibratorArgumentCollection
.
Mode
.
SNP
defaults
++=
Map
(
"ts_filter_level"
->
99.5
)
if
(
config
.
contains
(
"hapmap"
))
resource
+:=
new
TaggedFile
(
config
(
"hapmap"
).
asString
,
"known=false,training=true,truth=true,prior=15.0"
)
if
(
config
.
contains
(
"omni"
))
resource
+:=
new
TaggedFile
(
config
(
"omni"
).
asString
,
"known=false,training=true,truth=true,prior=12.0"
)
if
(
config
.
contains
(
"1000G"
))
resource
+:=
new
TaggedFile
(
config
(
"1000G"
).
asString
,
"known=false,training=true,truth=false,prior=10.0"
)
...
...
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala
View file @
91e72434
...
...
@@ -9,9 +9,7 @@ import nl.lumc.sasc.biopet.core.MultiSampleQScript
import
nl.lumc.sasc.biopet.core.PipelineCommand
import
nl.lumc.sasc.biopet.core.config.Configurable
import
htsjdk.samtools.SamReaderFactory
import
nl.lumc.sasc.biopet.pipelines.mapping.Mapping._
import
scala.collection.JavaConversions._
import
java.io.File
import
nl.lumc.sasc.biopet.extensions.gatk.
{
CombineVariants
,
CombineGVCFs
}
import
nl.lumc.sasc.biopet.extensions.picard.AddOrReplaceReadGroups
import
nl.lumc.sasc.biopet.extensions.picard.SamToFastq
...
...
@@ -21,40 +19,139 @@ import org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.utils.commandline.
{
Argument
}
class
GatkPipeline
(
val
root
:
Configurable
)
extends
QScript
with
MultiSampleQScript
{
qscript
=>
def
this
()
=
this
(
null
)
@Argument
(
doc
=
"Skip Genotyping step"
,
shortName
=
"skipgenotyping"
,
required
=
false
)
var
skipGenotyping
:
Boolean
=
false
var
skipGenotyping
:
Boolean
=
config
(
"skip_genotyping"
,
default
=
false
)
@Argument
(
doc
=
"Merge gvcfs"
,
shortName
=
"mergegvcfs"
,
required
=
false
)
var
mergeGvcfs
:
Boolean
=
false
/** Merge gvcfs */
var
mergeGvcfs
:
Boolean
=
config
(
"merge_gvcfs"
,
default
=
false
)
@Argument
(
doc
=
"
Joint variantcalling
"
,
shortName
=
"jointVariantCalling"
,
required
=
false
)
/**
Joint variantcalling
*/
var
jointVariantcalling
:
Boolean
=
config
(
"joint_variantcalling"
,
default
=
false
)
@Argument
(
doc
=
"Joint genotyping"
,
shortName
=
"jointGenotyping"
,
required
=
false
)
/** Joint genotyping */
var
jointGenotyping
:
Boolean
=
config
(
"joint_genotyping"
,
default
=
false
)
var
singleSampleCalling
=
config
(
"single_sample_calling"
,
default
=
true
)
var
reference
:
File
=
config
(
"reference"
,
required
=
true
)
var
dbsnp
:
File
=
config
(
"dbsnp"
)
var
gvcfFiles
:
List
[
File
]
=
Nil
var
finalBamFiles
:
List
[
File
]
=
Nil
var
useAllelesOption
:
Boolean
=
config
(
"use_alleles_option"
,
default
=
false
)
val
externalGvcfs
=
config
(
"external_gvcfs_files"
,
default
=
Nil
).
asFileList
/**
* class LibraryOutput extends AbstractLibraryOutput {
* var mappedBamFile: File = _
* var variantcalling: GatkVariantcalling.ScriptOutput = _
* }
*
* class SampleOutput extends AbstractSampleOutput {
* var variantcalling: GatkVariantcalling.ScriptOutput = _
* }
*/
def
makeSample
(
id
:
String
)
=
new
Sample
(
id
)
class
Sample
(
sampleId
:
String
)
extends
AbstractSample
(
sampleId
)
{
def
makeLibrary
(
id
:
String
)
=
new
Library
(
id
)
class
Library
(
libraryId
:
String
)
extends
AbstractLibrary
(
libraryId
)
{
val
mapping
=
new
Mapping
(
qscript
)
mapping
.
sampleId
=
sampleId
mapping
.
libraryId
=
libraryId
mapping
.
outputDir
=
libDir
+
"/variantcalling/"
/** Library variantcalling */
val
gatkVariantcalling
=
new
GatkVariantcalling
(
qscript
)
gatkVariantcalling
.
sampleID
=
sampleId
gatkVariantcalling
.
outputDir
=
libDir
def
addJobs
()
:
Unit
=
{
val
bamFile
:
Option
[
File
]
=
if
(
config
.
contains
(
"R1"
))
{
mapping
.
input_R1
=
config
(
"R1"
)
mapping
.
input_R2
=
config
(
"R2"
)
mapping
.
init
mapping
.
biopetScript
addAll
(
mapping
.
functions
)
// Add functions of mapping to curent function pool
Some
(
mapping
.
finalBamFile
)
}
else
if
(
config
.
contains
(
"bam"
))
{
var
bamFile
:
File
=
config
(
"bam"
)
if
(!
bamFile
.
exists
)
throw
new
IllegalStateException
(
"Bam in config does not exist, file: "
+
bamFile
)
if
(
config
(
"bam_to_fastq"
,
default
=
false
).
asBoolean
)
{
val
samToFastq
=
SamToFastq
(
qscript
,
bamFile
,
libDir
+
sampleId
+
"-"
+
libraryId
+
".R1.fastq"
,
libDir
+
sampleId
+
"-"
+
libraryId
+
".R2.fastq"
)
samToFastq
.
isIntermediate
=
true
qscript
.
add
(
samToFastq
)
mapping
.
input_R1
=
samToFastq
.
fastqR1
mapping
.
input_R2
=
samToFastq
.
fastqR2
mapping
.
init
mapping
.
biopetScript
addAll
(
mapping
.
functions
)
// Add functions of mapping to curent function pool
Some
(
mapping
.
finalBamFile
)
}
else
{
var
readGroupOke
=
true
val
inputSam
=
SamReaderFactory
.
makeDefault
.
open
(
bamFile
)
val
header
=
inputSam
.
getFileHeader
.
getReadGroups
for
(
readGroup
<-
inputSam
.
getFileHeader
.
getReadGroups
)
{
if
(
readGroup
.
getSample
!=
sampleId
)
logger
.
warn
(
"Sample ID readgroup in bam file is not the same"
)
if
(
readGroup
.
getLibrary
!=
libraryId
)
logger
.
warn
(
"Library ID readgroup in bam file is not the same"
)
if
(
readGroup
.
getSample
!=
sampleId
||
readGroup
.
getLibrary
!=
libraryId
)
readGroupOke
=
false
}
inputSam
.
close
if
(!
readGroupOke
)
{
if
(
config
(
"correct_readgroups"
,
default
=
false
))
{
logger
.
info
(
"Correcting readgroups, file:"
+
bamFile
)
val
aorrg
=
AddOrReplaceReadGroups
(
qscript
,
bamFile
,
new
File
(
libDir
+
sampleId
+
"-"
+
libraryId
+
".bam"
))
aorrg
.
RGID
=
sampleId
+
"-"
+
libraryId
aorrg
.
RGLB
=
libraryId
aorrg
.
RGSM
=
sampleId
aorrg
.
isIntermediate
=
true
qscript
.
add
(
aorrg
)
bamFile
=
aorrg
.
output
}
else
throw
new
IllegalStateException
(
"Sample readgroup and/or library of input bamfile is not correct, file: "
+
bamFile
+
"\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this"
)
}
addAll
(
BamMetrics
(
qscript
,
bamFile
,
libDir
+
"metrics/"
).
functions
)
Some
(
bamFile
)
}
}
else
{
logger
.
error
(
"Sample: "
+
sampleId
+
": No R1 found for run: "
+
libraryId
)
None
}
class
LibraryOutput
extends
AbstractLibraryOutput
{
var
mappedBamFile
:
File
=
_
var
variantcalling
:
GatkVariantcalling.ScriptOutput
=
_
}
if
(
bamFile
.
isDefined
)
{
gatkVariantcalling
.
inputBams
=
List
(
bamFile
.
get
)
gatkVariantcalling
.
variantcalling
=
config
(
"library_variantcalling"
,
default
=
false
)
gatkVariantcalling
.
preProcesBams
=
true
gatkVariantcalling
.
init
gatkVariantcalling
.
biopetScript
addAll
(
gatkVariantcalling
.
functions
)
}
}
}
/** sample variantcalling */
val
gatkVariantcalling
=
new
GatkVariantcalling
(
qscript
)
gatkVariantcalling
.
sampleID
=
sampleId
gatkVariantcalling
.
outputDir
=
sampleDir
+
"/variantcalling/"
class
SampleOutput
extends
AbstractSampleOutput
{
var
variantcalling
:
GatkVariantcalling.ScriptOutput
=
_
def
addJobs
()
:
Unit
=
{
runLibraryJobs
()
gatkVariantcalling
.
inputBams
=
libraries
.
map
(
_
.
_2
.
mapping
.
finalBamFile
).
toList
gatkVariantcalling
.
preProcesBams
=
false
if
(!
singleSampleCalling
)
{
gatkVariantcalling
.
useHaplotypecaller
=
false
gatkVariantcalling
.
useUnifiedGenotyper
=
false
}
gatkVariantcalling
.
init
gatkVariantcalling
.
biopetScript
addAll
(
gatkVariantcalling
.
functions
)
}
}
def
init
()
{
if
(
config
.
contains
(
"gvcfFiles"
))
for
(
file
<-
config
(
"gvcfFiles"
).
asList
)
gvcfFiles
:+=
file
.
toString
if
(
outputDir
==
null
)
throw
new
IllegalStateException
(
"Missing Output directory on gatk module"
)
else
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
}
...
...
@@ -68,11 +165,11 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
runSamplesJobs
//SampleWide jobs
if
(
mergeGvcfs
&&
gvcfFi
les
.
size
>
0
)
{
val
gvcfFiles
:
List
[
File
]
=
if
(
mergeGvcfs
&&
externalGvcfs
.
size
+
samp
les
.
size
>
1
)
{
val
newFile
=
outputDir
+
"merged.gvcf.vcf.gz"
add
(
CombineGVCFs
(
this
,
gvcfFile
s
,
newFile
))
gvcfFiles
=
List
(
newFile
)
}
add
(
CombineGVCFs
(
this
,
externalGvcfs
++
samples
.
map
(
_
.
_2
.
gatkVariantcalling
.
scriptOutput
.
gvcfFile
)
,
newFile
))
List
(
newFile
)
}
else
externalGvcfs
++
samples
.
map
(
_
.
_2
.
gatkVariantcalling
.
scriptOutput
.
gvcfFile
)
if
(!
skipGenotyping
&&
gvcfFiles
.
size
>
0
)
{
if
(
jointGenotyping
)
{
...
...
@@ -87,11 +184,8 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
}
else
logger
.
warn
(
"No gVCFs to genotype"
)
if
(
jointVariantcalling
)
{
val
allBamfiles
=
for
(
(
sampleID
,
sampleOutput
)
<-
samplesOutput
;
file
<-
sampleOutput
.
variantcalling
.
bamFiles
)
yield
file
val
allRawVcfFiles
=
for
((
sampleID
,
sampleOutput
)
<-
samplesOutput
)
yield
sampleOutput
.
variantcalling
.
rawFilterVcfFile
val
allBamfiles
=
samples
.
map
(
_
.
_2
.
gatkVariantcalling
.
scriptOutput
.
bamFiles
).
toList
.
fold
(
Nil
)(
_
++
_
)
val
allRawVcfFiles
=
samples
.
map
(
_
.
_2
.
gatkVariantcalling
.
scriptOutput
.
rawVcfFile
).
filter
(
_
!=
null
).
toList
val
gatkVariantcalling
=
new
GatkVariantcalling
(
this
)
{
override
def
configName
=
"gatkvariantcalling"
...
...
@@ -116,7 +210,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
if
(
config
(
"inputtype"
,
default
=
"dna"
).
asString
!=
"rna"
&&
config
(
"recalibration"
,
default
=
false
).
asBoolean
)
{
val
recalibration
=
new
GatkVariantRecalibration
(
this
)
recalibration
.
inputVcf
=
multisampleVariantcalling
.
scriptOutput
.
finalVcfFile
recalibration
.
bamFiles
=
fin
alBam
F
iles
recalibration
.
bamFiles
=
al
l
Bam
f
iles
recalibration
.
outputDir
=
outputDir
+
"recalibration/"
recalibration
.
init
recalibration
.
biopetScript
...
...
@@ -124,6 +218,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
}
}
/*
// Called for each sample
def runSingleSampleJobs(sampleID: String): SampleOutput = {
val sampleOutput = new SampleOutput
...
...
@@ -246,6 +341,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
return libraryOutput
}
*/
}
object
GatkPipeline
extends
PipelineCommand
protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala
View file @
91e72434
...
...
@@ -11,6 +11,7 @@ import nl.lumc.sasc.biopet.tools.{ MpileupToVcf, VcfFilter, MergeAlleles }
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.gatk.
{
AnalyzeCovariates
,
BaseRecalibrator
,
GenotypeGVCFs
,
HaplotypeCaller
,
IndelRealigner
,
PrintReads
,
RealignerTargetCreator
,
SelectVariants
,
CombineVariants
,
UnifiedGenotyper
}
import
nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Argument
}
...
...
@@ -68,7 +69,8 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
if
(
files
.
isEmpty
)
throw
new
IllegalStateException
(
"Files can't be empty"
)
if
(!
doublePreProces
.
get
)
return
files
val
markDup
=
MarkDuplicates
(
this
,
files
,
new
File
(
outputDir
+
outputName
+
".dedup.bam"
))
add
(
markDup
,
isIntermediate
=
useIndelRealigner
)
markDup
.
isIntermediate
=
useIndelRealigner
add
(
markDup
)
if
(
useIndelRealigner
)
{
List
(
addIndelRealign
(
markDup
.
output
,
outputDir
,
isIntermediate
=
false
))
}
else
{
...
...
@@ -141,11 +143,13 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
add
(
m2v
)
scriptOutput
.
rawVcfFile
=
m2v
.
output
val
vcfFilter
=
new
VcfFilter
(
this
)
vcfFilter
.
defaults
++=
Map
(
"min_sample_depth"
->
8
,
"min_alternate_depth"
->
2
,
"min_samples_pass"
->
1
,
"filter_ref_calls"
->
true
)
val
vcfFilter
=
new
VcfFilter
(
this
)
{
override
def
defaults
=
ConfigUtils
.
mergeMaps
(
Map
(
"min_sample_depth"
->
8
,
"min_alternate_depth"
->
2
,
"min_samples_pass"
->
1
,
"filter_ref_calls"
->
true
),
super
.
defaults
)
}
vcfFilter
.
inputVcf
=
m2v
.
output
vcfFilter
.
outputVcf
=
this
.
swapExt
(
outputDir
,
m2v
.
output
,
".vcf"
,
".filter.vcf.gz"
)
add
(
vcfFilter
)
...
...
@@ -157,7 +161,8 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
// Allele mode
if
(
useAllelesOption
.
get
)
{
val
mergeAlleles
=
MergeAlleles
(
this
,
mergeList
.
toList
,
outputDir
+
"raw.allele__temp_only.vcf.gz"
)
add
(
mergeAlleles
,
isIntermediate
=
true
)
mergeAlleles
.
isIntermediate
=
true
add
(
mergeAlleles
)
if
(
useHaplotypecaller
.
get
)
{
val
hcAlleles
=
new
HaplotypeCaller
(
this
)
...
...
@@ -187,7 +192,8 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
val
sv
=
SelectVariants
(
this
,
input
,
output
)
sv
.
excludeFiltered
=
true
sv
.
excludeNonVariants
=
true
add
(
sv
,
isIntermediate
=
true
)
sv
.
isIntermediate
=
true
add
(
sv
)
sv
.
out
}
...
...
@@ -200,10 +206,12 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
def
addIndelRealign
(
inputBam
:
File
,
dir
:
String
,
isIntermediate
:
Boolean
=
true
)
:
File
=
{
val
realignerTargetCreator
=
RealignerTargetCreator
(
this
,
inputBam
,
dir
)
add
(
realignerTargetCreator
,
isIntermediate
=
true
)
realignerTargetCreator
.
isIntermediate
=
true
add
(
realignerTargetCreator
)
val
indelRealigner
=
IndelRealigner
.
apply
(
this
,
inputBam
,
realignerTargetCreator
.
out
,
dir
)
add
(
indelRealigner
,
isIntermediate
=
isIntermediate
)
indelRealigner
.
isIntermediate
=
isIntermediate
add
(
indelRealigner
)
return
indelRealigner
.
o
}
...
...
@@ -227,7 +235,8 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
val
printReads
=
PrintReads
(
this
,
inputBam
,
swapExt
(
dir
,
inputBam
,
".bam"
,
".baserecal.bam"
))
printReads
.
BQSR
=
baseRecalibrator
.
o
add
(
printReads
,
isIntermediate
=
isIntermediate
)
printReads
.
isIntermediate
=
isIntermediate
add
(
printReads
)
return
printReads
.
o
}
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala
View file @
91e72434
...
...
@@ -45,7 +45,7 @@ trait MultiSampleQScript extends BiopetQScript {
abstract
class
AbstractLibrary
(
val
libraryId
:
String
)
{
/** Overrules config of qscript with default sample and default library */
val
config
=
new
ConfigFunctions
(
defaultSample
=
sampleId
,
defaultLibrary
=
libraryId
)
/** Adds the library jobs */
final
def
add
()
:
Unit
=
{
currentSample
=
Some
(
sampleId
)
...
...
@@ -116,10 +116,10 @@ trait MultiSampleQScript extends BiopetQScript {
/**
* Factory method for Sample class
* @param id SampleId
* @param id SampleId
* @return Sample class
*/
def
makeSample
(
id
:
String
)
:
Sample
def
makeSample
(
id
:
String
)
:
Sample
/** Stores all samples */
val
samples
:
Map
[
String
,
Sample
]
=
sampleIds
.
map
(
id
=>
id
->
makeSample
(
id
)).
toMap
...
...
public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala
View file @
91e72434
...
...
@@ -79,7 +79,7 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
flexiprep
.
input_R1
=
inputFastq
flexiprep
.
init
flexiprep
.
biopetScript
addAll
(
flexiprep
.
functions
)
qscript
.
addAll
(
flexiprep
.
functions
)
val
flexiprepOutput
=
for
((
key
,
file
)
<-
flexiprep
.
outputFiles
if
key
.
endsWith
(
"output_R1"
))
yield
file
val
pf
=
new
PrefixFastq
(
qscript
)
...
...
@@ -87,13 +87,13 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
pf
.
outputFastq
=
prefixFastq
pf
.
prefixSeq
=
config
(
"sage_tag"
,
default
=
"CATG"
)
pf
.
deps
+:=
flexiprep
.
outputFiles
(
"fastq_input_R1"
)
add
(
pf
)
qscript
.
add
(
pf
)
mapping
.
input_R1
=
pf
.
outputFastq
mapping
.
outputDir
=
libDir
mapping
.
init
mapping
.
biopetScript
addAll
(
mapping
.
functions
)
qscript
.
addAll
(
mapping
.
functions
)
if
(
config
(
"library_counts"
,
default
=
false
).
asBoolean
)
{
addBedtoolsCounts
(
mapping
.
finalBamFile
,
sampleId
+
"-"
+
libraryId
,
libDir
)
...
...
@@ -110,13 +110,13 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
val
bamFile
:
File
=
if
(
libraryBamfiles
.
size
==
1
)
libraryBamfiles
.
head
else
if
(
libraryBamfiles
.
size
>
1
)
{
val
mergeSamFiles
=
MergeSamFiles
(
qscript
,
libraryBamfiles
,
sampleDir
)
add
(
mergeSamFiles
)
qscript
.
add
(
mergeSamFiles
)
mergeSamFiles
.
output
}
else
null
val
fastqFile
:
File
=
if
(
libraryFastqFiles
.
size
==
1
)
libraryFastqFiles
.
head
else
if
(
libraryFastqFiles
.
size
>
1
)
{
val
cat
=
Cat
(
qscript
,
libraryFastqFiles
,
sampleDir
+
sampleId
+
".fastq"
)
add
(
cat
)
qscript
.
add
(
cat
)
cat
.
output
}
else
null
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment