Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
B
biopet.biopet
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
Mirrors
biopet.biopet
Commits
7278bcf2
Commit
7278bcf2
authored
Apr 11, 2016
by
Peter van 't Hof
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added scattering on annotation
parent
180bb97e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
133 additions
and
42 deletions
+133
-42
public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala
.../nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala
+62
-0
public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
...n/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
+71
-42
No files found.
public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala
0 → 100644
View file @
7278bcf2
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package
nl.lumc.sasc.biopet.extensions.gatk
import
java.io.File
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
/**
* Extension for CombineVariants from GATK
*
* Created by pjvan_thof on 2/26/15.
*/
class
SelectVariants
(
val
root
:
Configurable
)
extends
Gatk
{
val
analysisType
=
"SelectVariants"
@Input
(
doc
=
""
,
required
=
true
)
var
inputFiles
:
List
[
File
]
=
Nil
@Output
(
doc
=
""
,
required
=
true
)
var
outputFile
:
File
=
null
var
excludeNonVariants
:
Boolean
=
false
var
inputMap
:
Map
[
File
,
String
]
=
Map
()
def
addInput
(
file
:
File
,
name
:
String
)
:
Unit
=
{
inputFiles
:+=
file
inputMap
+=
file
->
name
}
override
def
beforeGraph
()
:
Unit
=
{
super
.
beforeGraph
()
if
(
outputFile
.
getName
.
endsWith
(
".vcf.gz"
))
outputFiles
:+=
new
File
(
outputFile
.
getAbsolutePath
+
".tbi"
)
deps
:::=
inputFiles
.
filter
(
_
.
getName
.
endsWith
(
"vcf.gz"
)).
map
(
x
=>
new
File
(
x
.
getAbsolutePath
+
".tbi"
))
deps
=
deps
.
distinct
}
override
def
cmdLine
=
super
.
cmdLine
+
(
for
(
file
<-
inputFiles
)
yield
{
inputMap
.
get
(
file
)
match
{
case
Some
(
name
)
=>
required
(
"-V:"
+
name
,
file
)
case
_
=>
required
(
"-V"
,
file
)
}
}).
mkString
+
required
(
"-o"
,
outputFile
)
+
conditional
(
excludeNonVariants
,
"--excludeNonVariants"
)
}
public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
View file @
7278bcf2
...
...
@@ -15,15 +15,19 @@
*/
package
nl.lumc.sasc.biopet.pipelines.toucan
import
java.io.File
import
nl.lumc.sasc.biopet.core._
import
nl.lumc.sasc.biopet.core.summary.SummaryQScript
import
nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView
import
nl.lumc.sasc.biopet.extensions.bedtools.
{
BedtoolsIntersect
,
BedtoolsMerge
}
import
nl.lumc.sasc.biopet.extensions.manwe.
{
ManweAnnotateVcf
,
ManweSamplesImport
}
import
nl.lumc.sasc.biopet.extensions.tools.
{
GvcfToBed
,
VcfWithVcf
,
VepNormalizer
}
import
nl.lumc.sasc.biopet.extensions.
{
Bgzip
,
Ln
,
VariantEffectPredictor
}
import
nl.lumc.sasc.biopet.extensions.bedtools.
{
BedtoolsIntersect
,
BedtoolsMerge
}
import
nl.lumc.sasc.biopet.extensions.gatk.
{
CatVariants
,
SelectVariants
}
import
nl.lumc.sasc.biopet.extensions.manwe.
{
ManweAnnotateVcf
,
ManweSamplesImport
}
import
nl.lumc.sasc.biopet.extensions.tools.
{
GvcfToBed
,
VcfWithVcf
,
VepNormalizer
}
import
nl.lumc.sasc.biopet.extensions.
{
Bgzip
,
Ln
,
VariantEffectPredictor
}
import
nl.lumc.sasc.biopet.utils.VcfUtils
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import
org.broadinstitute.gatk.queue.QScript
/**
...
...
@@ -40,6 +44,9 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
@Input
(
doc
=
"Input GVCF file"
,
shortName
=
"gvcf"
,
required
=
false
)
var
inputGvcf
:
Option
[
File
]
=
None
lazy
val
gonlVcfFile
:
Option
[
File
]
=
config
(
"gonl_vcf"
)
lazy
val
exacVcfFile
:
Option
[
File
]
=
config
(
"exac_vcf"
)
var
sampleIds
:
List
[
String
]
=
Nil
def
init
()
:
Unit
=
{
inputFiles
:+=
new
InputFile
(
inputVCF
)
...
...
@@ -63,47 +70,69 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
case
_
=>
throw
new
IllegalArgumentException
(
"You have not specified a GVCF file"
)
}
}
else
inputVCF
val
vep
=
new
VariantEffectPredictor
(
this
)
vep
.
input
=
useVcf
vep
.
output
=
new
File
(
outputDir
,
inputVCF
.
getName
.
stripSuffix
(
".gz"
).
stripSuffix
(
".vcf"
)
+
".vep.vcf"
)
vep
.
isIntermediate
=
true
add
(
vep
)
addSummarizable
(
vep
,
"variant_effect_predictor"
)
val
normalizer
=
new
VepNormalizer
(
this
)
normalizer
.
inputVCF
=
vep
.
output
normalizer
.
outputVcf
=
swapExt
(
outputDir
,
vep
.
output
,
".vcf"
,
".normalized.vcf.gz"
)
add
(
normalizer
)
// Optional annotation steps, depend is some files existing in the config
val
gonlVcfFile
:
Option
[
File
]
=
config
(
"gonl_vcf"
)
val
exacVcfFile
:
Option
[
File
]
=
config
(
"exac_vcf"
)
var
outputFile
=
normalizer
.
outputVcf
gonlVcfFile
match
{
case
Some
(
gonlFile
)
=>
val
vcfWithVcf
=
new
VcfWithVcf
(
this
)
vcfWithVcf
.
input
=
outputFile
vcfWithVcf
.
secondaryVcf
=
gonlFile
vcfWithVcf
.
output
=
swapExt
(
outputDir
,
normalizer
.
outputVcf
,
".vcf.gz"
,
".gonl.vcf.gz"
)
vcfWithVcf
.
fields
::=
(
"AF"
,
"AF_gonl"
,
None
)
add
(
vcfWithVcf
)
outputFile
=
vcfWithVcf
.
output
case
_
=>
val
outputVcfFiles
=
BedRecordList
.
fromReference
(
referenceFasta
())
.
scatter
(
config
(
"bin_size"
,
default
=
50000000
))
.
allRecords
.
map
{
region
=>
val
chunkName
=
s
"${region.chr}-${region.start}-${region.end}"
val
chunkDir
=
new
File
(
outputDir
,
"chunk"
+
File
.
separator
+
chunkName
)
val
sv
=
new
SelectVariants
(
this
)
sv
.
inputFiles
:+=
useVcf
sv
.
outputFile
=
new
File
(
chunkDir
,
chunkName
+
".vcf.gz"
)
sv
.
isIntermediate
=
true
add
(
sv
)
val
vep
=
new
VariantEffectPredictor
(
this
)
vep
.
input
=
sv
.
outputFile
vep
.
output
=
new
File
(
chunkDir
,
chunkName
+
".vep.vcf"
)
vep
.
isIntermediate
=
true
add
(
vep
)
addSummarizable
(
vep
,
"variant_effect_predictor"
)
val
normalizer
=
new
VepNormalizer
(
this
)
normalizer
.
inputVCF
=
vep
.
output
normalizer
.
outputVcf
=
new
File
(
chunkDir
,
chunkName
+
".normalized.vcf.gz"
)
add
(
normalizer
)
var
outputFile
=
normalizer
.
outputVcf
gonlVcfFile
match
{
case
Some
(
gonlFile
)
=>
val
vcfWithVcf
=
new
VcfWithVcf
(
this
)
vcfWithVcf
.
input
=
outputFile
vcfWithVcf
.
secondaryVcf
=
gonlFile
vcfWithVcf
.
output
=
swapExt
(
chunkDir
,
normalizer
.
outputVcf
,
".vcf.gz"
,
".gonl.vcf.gz"
)
vcfWithVcf
.
fields
::=
(
"AF"
,
"AF_gonl"
,
None
)
add
(
vcfWithVcf
)
outputFile
=
vcfWithVcf
.
output
case
_
=>
}
exacVcfFile
match
{
case
Some
(
exacFile
)
=>
val
vcfWithVcf
=
new
VcfWithVcf
(
this
)
vcfWithVcf
.
input
=
outputFile
vcfWithVcf
.
secondaryVcf
=
exacFile
vcfWithVcf
.
output
=
swapExt
(
chunkDir
,
outputFile
,
".vcf.gz"
,
".exac.vcf.gz"
)
vcfWithVcf
.
fields
::=
(
"AF"
,
"AF_exac"
,
None
)
add
(
vcfWithVcf
)
outputFile
=
vcfWithVcf
.
output
case
_
=>
}
outputFile
}
exacVcfFile
match
{
case
Some
(
exacFile
)
=>
val
vcfWithVcf
=
new
VcfWithVcf
(
this
)
vcfWithVcf
.
input
=
outputFile
vcfWithVcf
.
secondaryVcf
=
exacFile
vcfWithVcf
.
output
=
swapExt
(
outputDir
,
outputFile
,
".vcf.gz"
,
".exac.vcf.gz"
)
vcfWithVcf
.
fields
::=
(
"AF"
,
"AF_exac"
,
None
)
add
(
vcfWithVcf
)
outputFile
=
vcfWithVcf
.
output
case
_
=>
val
cv
=
new
CatVariants
(
this
)
cv
.
inputFiles
=
outputVcfFiles
.
toList
cv
.
outputFile
=
(
gonlVcfFile
,
exacVcfFile
)
match
{
case
(
Some
(
_
),
Some
(
_
))
=>
swapExt
(
outputDir
,
inputVCF
,
".vcf.gz"
,
".vep.normalized.gonl.exac.vcf.gz"
)
case
(
Some
(
_
),
_
)
=>
swapExt
(
outputDir
,
inputVCF
,
".vcf.gz"
,
".vep.normalized.gonl.vcf.gz"
)
case
(
_
,
Some
(
_
))
=>
swapExt
(
outputDir
,
inputVCF
,
".vcf.gz"
,
".vep.normalized.exac.vcf.gz"
)
case
_
=>
swapExt
(
outputDir
,
inputVCF
,
".vcf.gz"
,
".vep.normalized.vcf.gz"
)
}
add
(
cv
)
addSummaryJobs
()
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment