Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
265931b8
Commit
265931b8
authored
Mar 16, 2015
by
Peter van 't Hof
Browse files
Stats per bin per chr
parent
cbf4c982
Changes
1
Hide whitespace changes
Inline
Side-by-side
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala
View file @
265931b8
...
...
@@ -324,40 +324,48 @@ object VcfStats extends ToolCommand {
logger
.
info
(
"total: "
+
variantCounter
+
" rows processed, "
+
fraction
+
"% done"
)
}
val
stats
=
(
for
(
interval
<-
intervals
.
par
)
yield
{
val
reader
=
new
VCFFileReader
(
commandArgs
.
inputFile
,
true
)
var
chunkCounter
=
0
val
stats
=
createStats
logger
.
info
(
"Starting on: "
+
interval
)
for
(
record
<-
reader
.
query
(
interval
.
getSequence
,
interval
.
getStart
,
interval
.
getEnd
)
if
record
.
getStart
<=
interval
.
getEnd
)
{
mergeNestedStatsMap
(
stats
.
generalStats
,
checkGeneral
(
record
,
adInfoTags
))
for
(
sample1
<-
samples
)
yield
{
val
genotype
=
record
.
getGenotype
(
sample1
)
mergeNestedStatsMap
(
stats
.
samplesStats
(
sample1
).
genotypeStats
,
checkGenotype
(
record
,
genotype
,
adGenotypeTags
))
for
(
sample2
<-
samples
)
{
val
genotype2
=
record
.
getGenotype
(
sample2
)
if
(
genotype
.
getAlleles
==
genotype2
.
getAlleles
)
stats
.
samplesStats
(
sample1
).
sampleToSample
(
sample2
).
genotypeOverlap
+=
1
stats
.
samplesStats
(
sample1
).
sampleToSample
(
sample2
).
alleleOverlap
+=
alleleOverlap
(
genotype
.
getAlleles
.
toList
,
genotype2
.
getAlleles
.
toList
)
val
chrIntervals
=
intervals
.
groupBy
(
_
.
getSequence
)
val
chrStats
=
for
((
chr
,
intervals
)
<-
chrIntervals
)
yield
{
val
binStats
=
for
(
interval
<-
intervals
.
par
)
yield
{
val
reader
=
new
VCFFileReader
(
commandArgs
.
inputFile
,
true
)
var
chunkCounter
=
0
val
stats
=
createStats
logger
.
info
(
"Starting on: "
+
interval
)
for
(
record
<-
reader
.
query
(
interval
.
getSequence
,
interval
.
getStart
,
interval
.
getEnd
)
if
record
.
getStart
<=
interval
.
getEnd
)
{
mergeNestedStatsMap
(
stats
.
generalStats
,
checkGeneral
(
record
,
adInfoTags
))
for
(
sample1
<-
samples
)
yield
{
val
genotype
=
record
.
getGenotype
(
sample1
)
mergeNestedStatsMap
(
stats
.
samplesStats
(
sample1
).
genotypeStats
,
checkGenotype
(
record
,
genotype
,
adGenotypeTags
))
for
(
sample2
<-
samples
)
{
val
genotype2
=
record
.
getGenotype
(
sample2
)
if
(
genotype
.
getAlleles
==
genotype2
.
getAlleles
)
stats
.
samplesStats
(
sample1
).
sampleToSample
(
sample2
).
genotypeOverlap
+=
1
stats
.
samplesStats
(
sample1
).
sampleToSample
(
sample2
).
alleleOverlap
+=
alleleOverlap
(
genotype
.
getAlleles
.
toList
,
genotype2
.
getAlleles
.
toList
)
}
}
chunkCounter
+=
1
}
chunkCounter
+=
1
}
reader
.
close
()
reader
.
close
()
if
(
commandArgs
.
writeBinStats
)
{
val
binOutputDir
=
new
File
(
commandArgs
.
outputDir
,
"bins"
+
File
.
separator
+
interval
.
getSequence
)
if
(
commandArgs
.
writeBinStats
)
{
val
binOutputDir
=
new
File
(
commandArgs
.
outputDir
,
"bins"
+
File
.
separator
+
interval
.
getSequence
)
writeGenotypeField
(
stats
,
samples
,
"general"
,
binOutputDir
,
prefix
=
"genotype-"
+
interval
.
getStart
+
"-"
+
interval
.
getEnd
)
writeField
(
stats
,
"general"
,
binOutputDir
,
prefix
=
interval
.
getStart
+
"-"
+
interval
.
getEnd
)
}
writeGenotypeField
(
stats
,
samples
,
"general"
,
binOutputDir
,
prefix
=
"genotype-"
+
interval
.
getStart
+
"-"
+
interval
.
getEnd
)
writeField
(
stats
,
"general"
,
binOutputDir
,
prefix
=
interval
.
getStart
+
"-"
+
interval
.
getEnd
)
status
(
chunkCounter
,
interval
)
stats
}
binStats
.
toList
.
fold
(
createStats
)(
_
+=
_
)
}
status
(
chunkCounter
,
interval
)
stats
}).
toList
.
fold
(
createStats
)(
_
+=
_
)
val
stats
=
chrStats
.
toList
.
fold
(
createStats
)(
_
+=
_
)
logger
.
info
(
"Done reading vcf records"
)
...
...
@@ -382,7 +390,13 @@ object VcfStats extends ToolCommand {
}
for
(
field
<-
commandArgs
.
generalWiggle
)
{
writeWiggle
(
intervals
,
field
,
"count"
,
new
File
(
commandArgs
.
outputDir
,
field
+
".wig"
))
val
file
=
new
File
(
commandArgs
.
outputDir
,
"wigs"
+
File
.
separator
+
"general-"
+
field
+
".wig"
)
writeWiggle
(
intervals
,
field
,
"count"
,
file
,
false
)
}
for
(
field
<-
commandArgs
.
genotypeWiggle
;
sample
<-
samples
)
{
val
file
=
new
File
(
commandArgs
.
outputDir
,
"wigs"
+
File
.
separator
+
"genotype-"
+
sample
+
"-"
+
field
+
".wig"
)
writeWiggle
(
intervals
,
field
,
sample
,
file
,
true
)
}
writeOverlap
(
stats
,
_
.
genotypeOverlap
,
commandArgs
.
outputDir
+
"/sample_compare/genotype_overlap"
,
samples
)
...
...
@@ -391,16 +405,20 @@ object VcfStats extends ToolCommand {
logger
.
info
(
"Done"
)
}
protected
def
writeWiggle
(
intervals
:
List
[
Interval
],
row
:
String
,
column
:
String
,
outputFile
:
File
)
:
Unit
=
{
protected
def
writeWiggle
(
intervals
:
List
[
Interval
],
row
:
String
,
column
:
String
,
outputFile
:
File
,
genotype
:
Boolean
)
:
Unit
=
{
val
bla
=
intervals
.
groupBy
(
_
.
getSequence
).
map
{
case
(
k
,
v
)
=>
k
->
v
.
sortBy
(
_
.
getStart
)
}
outputFile
.
getParentFile
.
mkdirs
()
val
writer
=
new
PrintWriter
(
outputFile
)
writer
.
println
(
"track type=wiggle_0"
)
for
((
chr
,
intervals
)
<-
bla
)
yield
{
val
length
=
intervals
.
head
.
length
()
writer
.
println
(
s
"fixedStep chrom=$chr start=1 step=$length span=$length"
)
for
(
interval
<-
intervals
)
{
val
file
=
new
File
(
commandArgs
.
outputDir
,
"bins"
+
File
.
separator
+
chr
+
File
.
separator
+
interval
.
getStart
+
"-"
+
interval
.
getEnd
+
"-general.tsv"
)
valueFromTsv
(
file
,
row
,
column
).
foreach
(
writer
.
println
(
_
))
val
file
=
{
if
(
genotype
)
new
File
(
commandArgs
.
outputDir
,
"bins"
+
File
.
separator
+
chr
+
File
.
separator
+
"genotype-"
+
interval
.
getStart
+
"-"
+
interval
.
getEnd
+
"-general.tsv"
)
else
new
File
(
commandArgs
.
outputDir
,
"bins"
+
File
.
separator
+
chr
+
File
.
separator
+
interval
.
getStart
+
"-"
+
interval
.
getEnd
+
"-general.tsv"
)
}
writer
.
println
(
valueFromTsv
(
file
,
row
,
column
).
getOrElse
(
0
))
}
}
writer
.
close
()
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment