Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
83af6688
Commit
83af6688
authored
Apr 06, 2017
by
akaljuvee
Browse files
changes Bowo and Peter suggested
parent
ec7fe052
Changes
1
Hide whitespace changes
Inline
Side-by-side
shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala
View file @
83af6688
...
...
@@ -18,11 +18,14 @@ import htsjdk.variant.vcf.VCFFileReader
import
nl.lumc.sasc.biopet.core.summary.
{
Summarizable
,
SummaryQScript
}
import
nl.lumc.sasc.biopet.core.
{
PipelineCommand
,
Reference
}
import
nl.lumc.sasc.biopet.extensions.Pysvtools
import
nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCallingReport.histogramBinBoundaries
import
nl.lumc.sasc.biopet.pipelines.shiva.svcallers._
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.utils.
{
BamUtils
,
Logging
}
import
org.broadinstitute.gatk.queue.QScript
import
scala.collection.JavaConversions._
/**
* Common trait for ShivaVariantcalling
*
...
...
@@ -98,7 +101,7 @@ class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScri
// sample tagging is however not available within this pipeline
for
((
sample
,
mergedResultFile
)
<-
outputMergedVCFbySample
)
{
lazy
val
counts
=
getVariantCounts
(
mergedResultFile
,
ShivaSvCallingReport
.
histogramBinBoundaries
)
lazy
val
counts
=
getVariantCounts
(
mergedResultFile
)
addSummarizable
(
new
Summarizable
{
def
summaryFiles
=
Map
(
"output_vcf"
->
mergedResultFile
)
def
summaryStats
=
counts
...
...
@@ -112,35 +115,35 @@ class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScri
protected
def
callersList
:
List
[
SvCaller
]
=
List
(
new
Breakdancer
(
this
),
new
Clever
(
this
),
new
Delly
(
this
),
new
Pindel
(
this
))
/** Settings for the summary */
def
summarySettings
=
Map
(
"sv_callers"
->
configCallers
.
toList
,
"hist_bin_boundaries"
->
ShivaSvCallingReport
.
histogramBinBoundaries
)
def
summarySettings
=
Map
(
"sv_callers"
->
configCallers
.
toList
,
"hist_bin_boundaries"
->
histogramBinBoundaries
)
/** Files for the summary */
def
summaryFiles
:
Map
[
String
,
File
]
=
if
(
inputBams
.
size
>
1
)
Map
(
"final_mergedvcf"
->
outputMergedVCF
)
else
Map
.
empty
def
getVariantCounts
(
vcfFile
:
File
,
breaks
:
Array
[
Int
])
:
Map
[
String
,
Any
]
=
{
val
delCounts
,
insCounts
,
dupCounts
,
invCounts
=
Array
.
fill
(
breaks
.
size
+
1
)
{
0
}
/** Parses a vcf-file and counts sv-s by type and size. Sv-s are divided to different size classes, the boundaries between these classes are those given in ShivaSvCallingReport.histogramBinBoundaries. */
def
getVariantCounts
(
vcfFile
:
File
)
:
Map
[
String
,
Any
]
=
{
val
delCounts
,
insCounts
,
dupCounts
,
invCounts
=
Array
.
fill
(
histogramBinBoundaries
.
size
+
1
)
{
0
}
var
traCount
=
0
val
iterator
=
new
VCFFileReader
(
vcfFile
,
false
).
iterator
while
(
iterator
.
hasNext
)
{
val
record
=
iterator
.
next
val
svType
=
record
.
getAttributeAsString
(
"SVTYPE"
,
""
)
if
(
svType
==
"TRA"
||
svType
==
"CTX"
||
svType
==
"ITX"
)
{
traCount
+=
1
}
else
{
val
size
=
record
.
getEnd
-
record
.
getStart
var
i
=
0
while
(
i
<
breaks
.
size
&&
size
>
breaks
(
i
))
i
+=
1
svType
match
{
case
"DEL"
=>
delCounts
(
i
)
+=
1
case
"INS"
=>
insCounts
(
i
)
+=
1
case
"DUP"
=>
dupCounts
(
i
)
+=
1
case
"INV"
=>
invCounts
(
i
)
+=
1
case
_
=>
logger
.
warn
(
s
"Vcf file contains a record of unknown type: file-$vcfFile, type-$svType"
)
val
reader
=
new
VCFFileReader
(
vcfFile
,
false
)
for
(
record
<-
reader
)
{
record
.
getAttributeAsString
(
"SVTYPE"
,
""
)
match
{
case
"TRA"
|
"CTX"
|
"ITX"
=>
traCount
+=
1
case
svType
=>
{
val
size
=
record
.
getEnd
-
record
.
getStart
var
i
=
0
while
(
i
<
histogramBinBoundaries
.
size
&&
size
>
histogramBinBoundaries
(
i
))
i
+=
1
svType
match
{
case
"DEL"
=>
delCounts
(
i
)
+=
1
case
"INS"
=>
insCounts
(
i
)
+=
1
case
"DUP"
=>
dupCounts
(
i
)
+=
1
case
"INV"
=>
invCounts
(
i
)
+=
1
case
_
=>
logger
.
warn
(
s
"Vcf file contains a record of unknown type: file-$vcfFile, type-$svType"
)
}
}
}
}
iterato
r
.
close
()
reade
r
.
close
()
Map
(
"DEL"
->
delCounts
,
"INS"
->
insCounts
,
"DUP"
->
dupCounts
,
"INV"
->
invCounts
,
"TRA"
->
traCount
)
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment