Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
B
biopet.biopet
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
Mirrors
biopet.biopet
Commits
f913516e
Commit
f913516e
authored
Feb 05, 2015
by
Peter van 't Hof
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added scala docs
parent
e5e7978f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
116 additions
and
24 deletions
+116
-24
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala
...k/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala
+116
-24
No files found.
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala
View file @
f913516e
...
...
@@ -21,14 +21,15 @@ class VcfStats(val root: Configurable) extends BiopetJavaCommandLineFunction {
@Input
(
doc
=
"Input fastq"
,
shortName
=
"I"
,
required
=
true
)
var
input
:
File
=
_
@Output
(
doc
=
"Output fastq"
,
shortName
=
"o"
,
required
=
true
)
protected
var
generalTsv
:
File
=
_
protected
var
outputDir
:
String
=
_
/**
* Set output dir and a output file
* @param dir
*/
def
setOutputDir
(
dir
:
String
)
:
Unit
=
{
outputDir
=
dir
generalTsv
=
new
File
(
dir
+
File
.
separator
+
"genotype_general.tsv
"
)
this
.
jobOutputFile
=
new
File
(
dir
+
File
.
separator
+
".vcfstats.out
"
)
}
/**
...
...
@@ -41,8 +42,10 @@ class VcfStats(val root: Configurable) extends BiopetJavaCommandLineFunction {
}
object
VcfStats
extends
ToolCommand
{
/** Commandline argument */
case
class
Args
(
inputFile
:
File
=
null
,
outputDir
:
String
=
null
,
intervals
:
Option
[
File
]
=
None
)
extends
AbstractArgs
/** Parsing commandline arguments */
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'I'
,
"inputFile"
)
required
()
unbounded
()
valueName
(
"<file>"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
inputFile
=
x
)
...
...
@@ -58,20 +61,30 @@ object VcfStats extends ToolCommand {
*/
}
/** Class to store sample to sample compare stats */
class
SampleToSampleStats
{
/** Number of genotypes match with other sample */
var
genotypeOverlap
:
Int
=
0
/** Number of alleles also found in other sample */
var
alleleOverlap
:
Int
=
0
/** Add an other class */
def
+=
(
other
:
SampleToSampleStats
)
{
this
.
genotypeOverlap
+=
other
.
genotypeOverlap
this
.
alleleOverlap
+=
other
.
alleleOverlap
}
}
/** class to store all sample relative stats */
class
SampleStats
{
/** Stores all genotype relative stats */
val
genotypeStats
:
mutable.Map
[
String
,
mutable.Map
[
Any
,
Int
]]
=
mutable
.
Map
()
/** Stores sample to sample compare stats */
val
sampleToSample
:
mutable.Map
[
String
,
SampleToSampleStats
]
=
mutable
.
Map
()
/** Add an other class */
def
+=
(
other
:
SampleStats
)
:
Unit
=
{
for
((
key
,
value
)
<-
other
.
sampleToSample
)
{
if
(
this
.
sampleToSample
.
contains
(
key
))
this
.
sampleToSample
(
key
)
+=
value
...
...
@@ -86,9 +99,13 @@ object VcfStats extends ToolCommand {
}
class
Stats
{
/** Stores are general stats */
val
generalStats
:
mutable.Map
[
String
,
mutable.Map
[
Any
,
Int
]]
=
mutable
.
Map
()
/** Stores all sample/genotype specific stats */
val
samplesStats
:
mutable.Map
[
String
,
SampleStats
]
=
mutable
.
Map
()
/** Add an other class */
def
+=
(
other
:
Stats
)
:
Stats
=
{
for
((
key
,
value
)
<-
other
.
samplesStats
)
{
if
(
this
.
samplesStats
.
contains
(
key
))
this
.
samplesStats
(
key
)
+=
value
...
...
@@ -103,11 +120,21 @@ object VcfStats extends ToolCommand {
}
}
/**
* Merge m2 into m1
* @param m1
* @param m2
*/
def
mergeStatsMap
(
m1
:
mutable.Map
[
Any
,
Int
],
m2
:
mutable.Map
[
Any
,
Int
])
:
Unit
=
{
for
(
key
<-
m2
.
keySet
)
m1
(
key
)
=
m1
.
getOrElse
(
key
,
0
)
+
m2
(
key
)
}
/**
* Merge m2 into m1
* @param m1
* @param m2
*/
def
mergeNestedStatsMap
(
m1
:
mutable.Map
[
String
,
mutable.Map
[
Any
,
Int
]],
m2
:
Map
[
String
,
Map
[
Any
,
Int
]])
:
Unit
=
{
for
((
field
,
fieldMap
)
<-
m2
)
{
if
(
m1
.
contains
(
field
))
{
...
...
@@ -119,7 +146,7 @@ object VcfStats extends ToolCommand {
}
}
var
commandArgs
:
Args
=
_
protected
var
commandArgs
:
Args
=
_
/**
* @param args the command line arguments
...
...
@@ -205,7 +232,7 @@ object VcfStats extends ToolCommand {
//logger.info(counter + " variants done")
logger
.
info
(
"Done reading vcf records"
)
plot
Xy
(
writeField
(
"QUAL"
,
stats
.
generalStats
.
getOrElse
(
"QUAL"
,
mutable
.
Map
())))
plot
Line
(
writeField
(
"QUAL"
,
stats
.
generalStats
.
getOrElse
(
"QUAL"
,
mutable
.
Map
())))
writeGenotypeFields
(
stats
,
commandArgs
.
outputDir
+
"/genotype_"
,
samples
)
writeOverlap
(
stats
,
_
.
genotypeOverlap
,
commandArgs
.
outputDir
+
"/sample_compare/genotype_overlap"
,
samples
)
writeOverlap
(
stats
,
_
.
alleleOverlap
,
commandArgs
.
outputDir
+
"/sample_compare/allele_overlap"
,
samples
)
...
...
@@ -213,12 +240,28 @@ object VcfStats extends ToolCommand {
logger
.
info
(
"Done"
)
}
def
checkGeneral
(
record
:
VariantContext
)
:
Map
[
String
,
Map
[
Any
,
Int
]]
=
{
val
qual
=
record
.
getPhredScaledQual
Map
(
"QUAL"
->
Map
(
qual
->
1
))
/**
* Function to check all general stats, all info expect sample/genotype specific stats
* @param record
* @return
*/
protected
def
checkGeneral
(
record
:
VariantContext
)
:
Map
[
String
,
Map
[
Any
,
Int
]]
=
{
val
buffer
=
mutable
.
Map
[
String
,
Map
[
Any
,
Int
]]()
buffer
+=
"qual"
->
Map
(
record
.
getPhredScaledQual
->
1
)
//TODO: more general stats
buffer
.
toMap
}
def
checkGenotype
(
record
:
VariantContext
,
genotype
:
Genotype
)
:
Map
[
String
,
Map
[
Any
,
Int
]]
=
{
/**
* Function to check sample/genotype specific stats
* @param record
* @param genotype
* @return
*/
protected
def
checkGenotype
(
record
:
VariantContext
,
genotype
:
Genotype
)
:
Map
[
String
,
Map
[
Any
,
Int
]]
=
{
val
buffer
=
mutable
.
Map
[
String
,
Map
[
Any
,
Int
]]()
buffer
+=
"DP"
->
Map
((
if
(
genotype
.
hasDP
)
genotype
.
getDP
else
"not set"
)
->
1
)
...
...
@@ -258,24 +301,47 @@ object VcfStats extends ToolCommand {
buffer
.
toMap
}
def
writeGenotypeFields
(
stats
:
Stats
,
prefix
:
String
,
samples
:
List
[
String
])
{
/**
* Function to write stats to tsv files
* @param stats
* @param prefix
* @param samples
*/
protected
def
writeGenotypeFields
(
stats
:
Stats
,
prefix
:
String
,
samples
:
List
[
String
])
{
val
fields
=
List
(
"DP"
,
"GQ"
,
"AD"
,
"AD-ref"
,
"AD-alt"
,
"AD-used"
,
"AD-not_used"
,
"general"
)
for
(
field
<-
fields
)
{
val
file
=
new
File
(
prefix
+
field
+
".tsv"
)
file
.
getParentFile
.
mkdirs
()
val
writer
=
new
PrintWriter
(
file
)
writer
.
println
(
samples
.
mkString
(
field
+
"\t"
,
"\t"
,
""
))
val
keySet
=
(
for
(
sample
<-
samples
)
yield
stats
.
samplesStats
(
sample
).
genotypeStats
.
getOrElse
(
field
,
Map
[
Any
,
Int
]()).
keySet
).
fold
(
Set
[
Any
]())(
_
++
_
)
for
(
key
<-
keySet
.
toList
.
sortWith
(
sortAnyAny
(
_
,
_
)))
{
val
values
=
for
(
sample
<-
samples
)
yield
stats
.
samplesStats
(
sample
).
genotypeStats
.
getOrElse
(
field
,
Map
[
Any
,
Int
]()).
getOrElse
(
key
,
0
)
writer
.
println
(
values
.
mkString
(
key
+
"\t"
,
"\t"
,
""
))
}
writer
.
close
()
plotXy
(
file
)
writeGenotypeField
(
stats
,
prefix
,
samples
,
field
)
}
}
/**
* Function to write 1 specific genotype field
* @param stats
* @param prefix
* @param samples
* @param field
*/
protected
def
writeGenotypeField
(
stats
:
Stats
,
prefix
:
String
,
samples
:
List
[
String
],
field
:
String
)
:
Unit
=
{
val
file
=
new
File
(
prefix
+
field
+
".tsv"
)
file
.
getParentFile
.
mkdirs
()
val
writer
=
new
PrintWriter
(
file
)
writer
.
println
(
samples
.
mkString
(
field
+
"\t"
,
"\t"
,
""
))
val
keySet
=
(
for
(
sample
<-
samples
)
yield
stats
.
samplesStats
(
sample
).
genotypeStats
.
getOrElse
(
field
,
Map
[
Any
,
Int
]()).
keySet
).
fold
(
Set
[
Any
]())(
_
++
_
)
for
(
key
<-
keySet
.
toList
.
sortWith
(
sortAnyAny
(
_
,
_
)))
{
val
values
=
for
(
sample
<-
samples
)
yield
stats
.
samplesStats
(
sample
).
genotypeStats
.
getOrElse
(
field
,
Map
[
Any
,
Int
]()).
getOrElse
(
key
,
0
)
writer
.
println
(
values
.
mkString
(
key
+
"\t"
,
"\t"
,
""
))
}
writer
.
close
()
plotLine
(
file
)
}
def
writeField
(
prefix
:
String
,
data
:
mutable.Map
[
Any
,
Int
])
:
File
=
{
/**
* Function to write 1 specific general field
* @param prefix
* @param data
* @return
*/
protected
def
writeField
(
prefix
:
String
,
data
:
mutable.Map
[
Any
,
Int
])
:
File
=
{
val
file
=
new
File
(
commandArgs
.
outputDir
+
"/"
+
prefix
+
".tsv"
)
println
(
file
)
file
.
getParentFile
.
mkdirs
()
...
...
@@ -288,6 +354,12 @@ object VcfStats extends ToolCommand {
file
}
/**
* Function to sort Any values
* @param a
* @param b
* @return
*/
def
sortAnyAny
(
a
:
Any
,
b
:
Any
)
:
Boolean
=
{
a
match
{
case
ai
:
Int
=>
{
...
...
@@ -301,6 +373,13 @@ object VcfStats extends ToolCommand {
}
}
/**
* Function to write sample to sample compare tsv's / heatmaps
* @param stats
* @param function function to extract targeted var in SampleToSampleStats
* @param prefix
* @param samples
*/
def
writeOverlap
(
stats
:
Stats
,
function
:
SampleToSampleStats
=>
Int
,
prefix
:
String
,
samples
:
List
[
String
])
:
Unit
=
{
val
absFile
=
new
File
(
prefix
+
".abs.tsv"
)
...
...
@@ -327,6 +406,10 @@ object VcfStats extends ToolCommand {
plotHeatmap
(
relFile
)
}
/**
* Plots heatmaps on target tsv file
* @param file
*/
def
plotHeatmap
(
file
:
File
)
{
executeRscript
(
"plotHeatmap.R"
,
Array
(
file
.
getAbsolutePath
,
file
.
getAbsolutePath
.
stripSuffix
(
".tsv"
)
+
".heatmap.png"
,
...
...
@@ -334,11 +417,20 @@ object VcfStats extends ToolCommand {
file
.
getAbsolutePath
.
stripSuffix
(
".tsv"
)
+
".heatmap.dendrogram.png"
))
}
def
plotXy
(
file
:
File
)
{
/**
* Plots line graph with target tsv file
* @param file
*/
def
plotLine
(
file
:
File
)
{
executeRscript
(
"plotXY.R"
,
Array
(
file
.
getAbsolutePath
,
file
.
getAbsolutePath
.
stripSuffix
(
".tsv"
)
+
".xy.png"
))
}
/**
* Function to execute Rscript as subproces
* @param resource
* @param args
*/
def
executeRscript
(
resource
:
String
,
args
:
Array
[
String
])
:
Unit
=
{
val
is
=
getClass
.
getResourceAsStream
(
resource
)
val
file
=
File
.
createTempFile
(
"script."
,
"."
+
resource
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment