Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
c9ef89f2
Commit
c9ef89f2
authored
Mar 16, 2015
by
Peter van 't Hof
Browse files
Improve thread binning
parent
265931b8
Changes
1
Hide whitespace changes
Inline
Side-by-side
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala
View file @
c9ef89f2
...
...
@@ -259,16 +259,18 @@ object VcfStats extends ToolCommand {
reader
.
close
()
val
adInfoTags
=
(
for
(
infoTag
<-
commandArgs
.
infoTags
if
!
defaultInfoFields
.
exists
(
_
==
infoTag
)
)
yield
{
require
(
header
.
getInfoHeaderLine
(
infoTag
)
!=
null
,
"Info tag '"
+
infoTag
+
"' not found in header of vcf file"
)
infoTag
})
:::
(
for
(
line
<-
header
.
getInfoHeaderLines
if
commandArgs
.
allInfoTags
if
!
defaultInfoFields
.
exists
(
_
==
line
.
getID
)
if
!
commandArgs
.
infoTags
.
exists
(
_
==
line
.
getID
)
)
yield
{
line
.
getID
}).
toList
:::
defaultInfoFields
val
adInfoTags
=
{
(
for
(
infoTag
<-
commandArgs
.
infoTags
if
!
defaultInfoFields
.
exists
(
_
==
infoTag
)
)
yield
{
require
(
header
.
getInfoHeaderLine
(
infoTag
)
!=
null
,
"Info tag '"
+
infoTag
+
"' not found in header of vcf file"
)
infoTag
})
:::
(
for
(
line
<-
header
.
getInfoHeaderLines
if
commandArgs
.
allInfoTags
if
!
defaultInfoFields
.
exists
(
_
==
line
.
getID
)
if
!
commandArgs
.
infoTags
.
exists
(
_
==
line
.
getID
)
)
yield
{
line
.
getID
}).
toList
:::
defaultInfoFields
}
val
adGenotypeTags
=
(
for
(
genotypeTag
<-
commandArgs
.
genotypeTags
if
!
defaultGenotypeFields
.
exists
(
_
==
genotypeTag
)
...
...
@@ -324,45 +326,46 @@ object VcfStats extends ToolCommand {
logger
.
info
(
"total: "
+
variantCounter
+
" rows processed, "
+
fraction
+
"% done"
)
}
val
chr
I
ntervals
=
intervals
.
group
By
(
_
.
getSequence
)
val
chr
Stats
=
for
(
(
chr
,
interval
s
)
<-
chrI
ntervals
)
yield
{
val
binStats
=
for
(
interval
<-
intervals
.
par
)
yield
{
val
reader
=
new
VCFFileReader
(
commandArgs
.
inputFile
,
true
)
var
chunkCounter
=
0
val
stats
=
createStats
logger
.
info
(
"Starting on: "
+
interval
)
for
(
record
<-
reader
.
query
(
interval
.
getSequence
,
interval
.
getStart
,
interval
.
getEnd
)
if
record
.
getStart
<=
interval
.
getEnd
)
{
mergeNestedStatsMap
(
stats
.
generalStats
,
checkGeneral
(
record
,
adInfoTags
))
for
(
sample1
<-
samples
)
yield
{
val
genotype
=
record
.
getGenotype
(
sample1
)
mergeNestedStatsMap
(
stats
.
samplesStats
(
sample1
).
genotypeStats
,
checkGenotype
(
record
,
genotype
,
adGenotypeTags
))
for
(
sample2
<-
sample
s
)
{
val
genotype
2
=
record
.
getGenotype
(
samp
le
2
)
if
(
genotype
.
getAlleles
==
genotype2
.
getAlleles
)
stats
.
samplesStats
(
sample1
).
sampleToSample
(
sample2
).
genotyp
eOverlap
+=
1
stats
.
samplesStats
(
sample1
).
sampleToSample
(
sample2
).
alleleOverlap
+=
alleleOverlap
(
genotype
.
getAlleles
.
toList
,
genotype2
.
getAlleles
.
toList
)
val
chr
Stats
=
for
(
i
ntervals
<-
intervals
.
group
ed
(
intervals
.
size
/
10
).
toList
.
par
)
yield
{
val
chunkStats
=
for
(
intervals
<-
intervals
.
grouped
(
10
))
yield
{
val
bin
Stats
=
for
(
interval
<-
i
ntervals
.
par
)
yield
{
val
reader
=
new
VCFFileReader
(
commandArgs
.
inputFile
,
true
)
var
chunkCounter
=
0
val
stats
=
createStats
logger
.
info
(
"Starting on: "
+
interval
)
for
(
record
<-
reader
.
query
(
interval
.
getSequence
,
interval
.
getStart
,
interval
.
getEnd
)
if
record
.
getStart
<=
interval
.
getEnd
)
{
mergeNestedStatsMap
(
stats
.
generalStats
,
checkGeneral
(
record
,
adInfoTags
))
for
(
sample1
<-
samples
)
yield
{
val
genotype
=
record
.
getGenotype
(
sample1
)
mergeNestedStatsMap
(
stats
.
samplesStats
(
sample1
).
genotypeStats
,
checkGenotype
(
record
,
genotype
,
adGenotypeTags
)
)
for
(
sample2
<-
samples
)
{
val
genotype2
=
record
.
getGenotype
(
sample
2
)
if
(
genotype
.
getAlleles
==
genotype2
.
getAlle
le
s
)
stats
.
samplesStats
(
sample1
).
sampleToSample
(
sample2
).
genotypeOverlap
+=
1
stats
.
samplesStats
(
sample1
).
sampleToSample
(
sample2
).
allel
eOverlap
+=
alleleOverlap
(
genotype
.
getAlleles
.
toList
,
genotype2
.
getAlleles
.
toList
)
}
}
chunkCounter
+=
1
}
chunkCounter
+=
1
}
reader
.
close
()
reader
.
close
()
if
(
commandArgs
.
writeBinStats
)
{
val
binOutputDir
=
new
File
(
commandArgs
.
outputDir
,
"bins"
+
File
.
separator
+
interval
.
getSequence
)
if
(
commandArgs
.
writeBinStats
)
{
val
binOutputDir
=
new
File
(
commandArgs
.
outputDir
,
"bins"
+
File
.
separator
+
interval
.
getSequence
)
writeGenotypeField
(
stats
,
samples
,
"general"
,
binOutputDir
,
prefix
=
"genotype-"
+
interval
.
getStart
+
"-"
+
interval
.
getEnd
)
writeField
(
stats
,
"general"
,
binOutputDir
,
prefix
=
interval
.
getStart
+
"-"
+
interval
.
getEnd
)
}
writeGenotypeField
(
stats
,
samples
,
"general"
,
binOutputDir
,
prefix
=
"genotype-"
+
interval
.
getStart
+
"-"
+
interval
.
getEnd
)
writeField
(
stats
,
"general"
,
binOutputDir
,
prefix
=
interval
.
getStart
+
"-"
+
interval
.
getEnd
)
}
status
(
chunkCounter
,
interval
)
stats
status
(
chunkCounter
,
interval
)
stats
}
binStats
.
toList
.
fold
(
createStats
)(
_
+=
_
)
}
bin
Stats
.
toList
.
fold
(
createStats
)(
_
+=
_
)
chunk
Stats
.
toList
.
fold
(
createStats
)(
_
+=
_
)
}
val
stats
=
chrStats
.
toList
.
fold
(
createStats
)(
_
+=
_
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment