Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
B
biopet.biopet
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
Mirrors
biopet.biopet
Commits
a615482d
Commit
a615482d
authored
Nov 18, 2015
by
Sander Bollen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
rewrite of varda implementation. It now must take a normal call VCF, *and* a GVCF.
parent
c01271b8
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
80 additions
and
102 deletions
+80
-102
public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala
...opet/pipelines/toucan/ManweActivateAfterAnnotImport.scala
+1
-1
public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala
.../biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala
+1
-1
public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
...n/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
+78
-100
No files found.
public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala
View file @
a615482d
...
...
@@ -26,7 +26,7 @@ class ManweActivateAfterAnnotImport(root: Configurable,
if
(
imported
.
output
.
exists
())
{
val
reader
=
Source
.
fromFile
(
imported
.
output
)
this
.
uri
=
reader
.
getLines
().
toList
.
head
this
.
uri
=
reader
.
getLines
().
toList
.
head
.
split
(
' '
).
last
reader
.
close
()
}
else
{
this
.
uri
=
""
...
...
public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala
View file @
a615482d
...
...
@@ -24,7 +24,7 @@ class ManweDownloadAfterAnnotate(root: Configurable,
if
(
annotate
.
output
.
exists
())
{
val
reader
=
Source
.
fromFile
(
annotate
.
output
)
this
.
uri
=
reader
.
getLines
().
toList
.
head
this
.
uri
=
reader
.
getLines
().
toList
.
head
.
split
(
' '
).
last
reader
.
close
()
}
else
{
this
.
uri
=
""
...
...
public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
View file @
a615482d
...
...
@@ -18,11 +18,12 @@ package nl.lumc.sasc.biopet.pipelines.toucan
import
java.io.
{
File
,
PrintWriter
}
import
nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView
import
nl.lumc.sasc.biopet.extensions.bedtools.BedtoolsIntersect
import
nl.lumc.sasc.biopet.extensions.manwe.
{
ManweSamplesImport
,
ManweAnnotateVcf
,
ManweDataSourcesAnnotate
}
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.core.summary.SummaryQScript
import
nl.lumc.sasc.biopet.core._
import
nl.lumc.sasc.biopet.extensions.
{
Ln
,
Gzip
,
VariantEffectPredictor
}
import
nl.lumc.sasc.biopet.extensions.
{
Bgzip
,
Ln
,
Gzip
,
VariantEffectPredictor
}
import
nl.lumc.sasc.biopet.extensions.tools.
{
GvcfToBed
,
VcfFilter
,
VcfWithVcf
,
VepNormalizer
}
import
nl.lumc.sasc.biopet.utils.
{
VcfUtils
,
ConfigUtils
}
import
org.broadinstitute.gatk.queue.QScript
...
...
@@ -55,8 +56,15 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
//defaults ++= Map("varianteffectpredictor" -> Map("everything" -> true))
def
biopetScript
()
:
Unit
=
{
val
doVarda
=
config
(
"use_varda"
,
default
=
false
)
val
useVcf
:
File
=
if
(
doVarda
)
varda
(
inputVCF
)
else
inputVCF
val
doVarda
:
Boolean
=
config
(
"use_varda"
,
default
=
false
)
val
useVcf
:
File
=
if
(
doVarda
)
{
val
gVcfRoot
:
Option
[
File
]
=
config
(
"varda_gvcf"
)
val
gVcf
=
gVcfRoot
match
{
case
Some
(
s
)
=>
s
case
_
=>
throw
new
IllegalArgumentException
(
"You have not specified a GVCF file"
)
}
varda
(
inputVCF
,
gVcf
)
}
else
inputVCF
val
vep
=
new
VariantEffectPredictor
(
this
)
vep
.
input
=
useVcf
vep
.
output
=
new
File
(
outputDir
,
inputVCF
.
getName
.
stripSuffix
(
".gz"
).
stripSuffix
(
".vcf"
)
+
".vep.vcf"
)
...
...
@@ -99,125 +107,95 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
}
}
/**
* Performs the varda import and activate for one sample
* @param sampleID the sampleID to be used
* @param inputVcf the input VCF
* @param gVCF the gVCF for coverage
* @param annotation: ManweDownloadAnnotateVcf object of annotated vcf
* @return
*/
def
importAndActivateSample
(
sampleID
:
String
,
inputVcf
:
File
,
gVCF
:
File
,
annotation
:
ManweAnnotateVcf
)
:
ManweActivateAfterAnnotImport
=
{
val
minGQ
:
Int
=
config
(
"minimum_genome_quality"
,
default
=
20
,
submodule
=
"manwe"
)
val
isPublic
:
Boolean
=
config
(
"varda_is_public"
,
default
=
true
,
submodule
=
"manwe"
)
val
bedTrack
=
new
GvcfToBed
(
this
)
bedTrack
.
inputVcf
=
gVCF
bedTrack
.
outputBed
=
swapExt
(
outputDir
,
gVCF
,
".vcf.gz"
,
s
""".$sampleID.bed"""
)
bedTrack
.
minQuality
=
minGQ
bedTrack
.
isIntermediate
=
true
bedTrack
.
sample
=
Some
(
sampleID
)
add
(
bedTrack
)
val
bgzippedBed
=
new
Bgzip
(
this
)
bgzippedBed
.
input
=
List
(
bedTrack
.
outputBed
)
bgzippedBed
.
output
=
swapExt
(
outputDir
,
bedTrack
.
outputBed
,
".bed"
,
".bed.gz"
)
add
(
bgzippedBed
)
val
singleVcf
=
new
BcftoolsView
(
this
)
singleVcf
.
input
=
inputVCF
singleVcf
.
output
=
swapExt
(
outputDir
,
inputVCF
,
".vcf.gz"
,
s
""".$sampleID.vcf.gz"""
)
singleVcf
.
samples
=
List
(
sampleID
)
singleVcf
.
minAC
=
Some
(
1
)
singleVcf
.
isIntermediate
=
true
add
(
singleVcf
)
val
intersected
=
new
BedtoolsIntersect
(
this
)
intersected
.
input
=
singleVcf
.
output
intersected
.
intersectFile
=
bgzippedBed
.
output
intersected
.
output
=
swapExt
(
outputDir
,
singleVcf
.
output
,
".vcf.gz"
,
".intersected.vcf.gz"
)
add
(
intersected
)
val
imported
=
new
ManweSamplesImport
(
this
)
imported
.
vcfs
=
List
(
intersected
.
output
)
imported
.
beds
=
List
(
bgzippedBed
.
output
)
imported
.
name
=
Some
(
sampleID
)
imported
.
public
=
isPublic
imported
.
waitToComplete
=
false
imported
.
isIntermediate
=
true
imported
.
output
=
swapExt
(
outputDir
,
intersected
.
output
,
".vcf.gz"
,
".manwe.import"
)
add
(
imported
)
val
active
=
new
ManweActivateAfterAnnotImport
(
this
,
annotation
,
imported
)
active
.
output
=
swapExt
(
outputDir
,
imported
.
output
,
".import"
,
".activated"
)
add
(
active
)
active
}
/**
* Perform varda analysis
* @param vcf input vcf
* @param gVcf The gVCF to be used for coverage calculations
* @return return vcf
*/
def
varda
(
vcf
:
File
)
:
File
=
{
//TODO: How to enforce use of gVCFs?
/**
* By default we only want to annotate variant calls
* not reference calls
* TODO: this does mean integration tests will have to take into account that not all records will be returned
*/
val
annotateJustVariants
=
config
(
"annotate_only_variants"
,
default
=
true
,
submodule
=
"manwe"
)
val
useVcf
=
if
(
annotateJustVariants
)
{
val
filter
=
new
VcfFilter
(
this
)
filter
.
inputVcf
=
vcf
filter
.
outputVcf
=
swapExt
(
outputDir
,
vcf
,
".vcf.gz"
,
".calls.vcf.gz"
)
filter
.
filterRefCalls
=
true
add
(
filter
)
filter
.
outputVcf
}
else
{
vcf
}
def
varda
(
vcf
:
File
,
gVcf
:
File
)
:
File
=
{
//TODO: add groups!!! Need sample-specific group tags for this
val
splits
=
sampleIds
.
map
(
x
=>
{
val
view
=
new
BcftoolsView
(
this
)
view
.
input
=
vcf
view
.
output
=
swapExt
(
outputDir
,
vcf
,
".vcf.gz"
,
s
"$x.vcf.gz"
)
view
.
samples
=
List
(
x
)
view
.
minAC
=
Some
(
0
)
add
(
view
)
view
})
val
minGQ
=
config
(
"minimum_genome_quality"
,
default
=
20
,
submodule
=
"manwe"
)
val
annotationQueries
:
List
[
String
]
=
config
(
"annotation_queries"
,
default
=
Nil
,
submodule
=
"manwe"
)
val
isPublic
:
Boolean
=
config
(
"varda_is_public"
,
default
=
true
,
submodule
=
"manwe"
)
val
rawFilteredVcfs
=
splits
.
map
(
x
=>
{
val
filter
=
new
VcfFilter
(
this
)
filter
.
inputVcf
=
x
.
output
filter
.
outputVcf
=
swapExt
(
outputDir
,
x
.
output
,
".vcf.gz"
,
".raw.filtered.vcf.gz"
)
filter
.
minGenomeQuality
=
minGQ
add
(
filter
)
filter
})
/**
* Bed tracks need to be created on the whole genome
* Only calls must be uploaded to varda in vcfs
*/
val
filteredVcfs
=
rawFilteredVcfs
.
map
(
x
=>
{
val
filter
=
new
VcfFilter
(
this
)
filter
.
inputVcf
=
x
.
outputVcf
filter
.
outputVcf
=
swapExt
(
outputDir
,
x
.
outputVcf
,
".raw.filtered.vcf.gz"
,
".filtered.vcf.gz"
)
filter
.
filterRefCalls
=
true
add
(
filter
)
filter
})
val
bedTracks
=
rawFilteredVcfs
.
map
(
x
=>
{
val
bed
=
new
GvcfToBed
(
this
)
bed
.
inputVcf
=
x
.
outputVcf
bed
.
outputBed
=
swapExt
(
outputDir
,
x
.
outputVcf
,
".vcf.gz"
,
".bed"
)
bed
.
minQuality
=
minGQ
add
(
bed
)
bed
})
val
zippedBedTracks
=
bedTracks
.
map
(
x
=>
{
val
gzip
=
new
Gzip
(
this
)
gzip
.
input
=
List
(
x
.
outputBed
)
gzip
.
output
=
swapExt
(
outputDir
,
x
.
outputBed
,
".bed"
,
".bed.gz"
)
add
(
gzip
)
gzip
})
//TODO: add groups!!! Need sample-specific group tags for this
val
annotate
=
new
ManweAnnotateVcf
(
this
)
annotate
.
vcf
=
useV
cf
annotate
.
vcf
=
v
cf
if
(
annotationQueries
.
nonEmpty
)
{
annotate
.
queries
=
annotationQueries
}
annotate
.
waitToComplete
=
true
annotate
.
output
=
swapExt
(
outputDir
,
vcf
,
".vcf.gz"
,
".tmp.annot"
)
annotate
.
output
=
swapExt
(
outputDir
,
vcf
,
".vcf.gz"
,
".manwe.annot"
)
annotate
.
isIntermediate
=
true
add
(
annotate
)
val
annotatedVcf
=
new
ManweDownloadAfterAnnotate
(
this
,
annotate
)
annotatedVcf
.
output
=
swapExt
(
outputDir
,
annotate
.
output
,
".
tmp.annot"
,
"tmp
.annot.vcf.gz"
)
annotatedVcf
.
output
=
swapExt
(
outputDir
,
annotate
.
output
,
".
manwe.annot"
,
"manwe
.annot.vcf.gz"
)
add
(
annotatedVcf
)
val
imports
=
for
(
(
sample
:
String
,
bed
,
vcf
)
<-
(
sampleIds
,
zippedBedTracks
,
filteredVcfs
).
zipped
)
yield
{
val
importing
=
new
ManweSamplesImport
(
this
)
importing
.
beds
=
List
(
bed
.
output
)
importing
.
vcfs
=
List
(
vcf
.
outputVcf
)
importing
.
name
=
Some
(
sample
)
importing
.
waitToComplete
=
true
importing
.
output
=
swapExt
(
outputDir
,
vcf
.
outputVcf
,
".vcf.gz"
,
".tmp.import"
)
importing
.
public
=
isPublic
add
(
importing
)
importing
}
val
activates
=
imports
.
map
(
x
=>
{
val
active
=
new
ManweActivateAfterAnnotImport
(
this
,
annotate
,
x
)
active
.
output
=
swapExt
(
outputDir
,
x
.
output
,
".tmp.import"
,
".tmp.activated"
)
add
(
active
)
active
})
val
activates
=
sampleIds
map
{
x
=>
importAndActivateSample
(
x
,
vcf
,
gVcf
,
annotate
)
}
val
finalLn
=
new
Ln
(
this
)
activates
.
foreach
(
x
=>
finalLn
.
deps
:+=
x
.
output
)
finalLn
.
input
=
annotatedVcf
.
output
finalLn
.
output
=
swapExt
(
outputDir
,
annotatedVcf
.
output
,
"
tmp
.annot.vcf.gz"
,
".varda_annotated.vcf.gz"
)
finalLn
.
output
=
swapExt
(
outputDir
,
annotatedVcf
.
output
,
"
manwe
.annot.vcf.gz"
,
".varda_annotated.vcf.gz"
)
finalLn
.
relative
=
true
add
(
finalLn
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment