Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
6aefe25a
Commit
6aefe25a
authored
Dec 20, 2016
by
Peter van 't Hof
Browse files
Moving and splitting pckage
parent
e17daab5
Changes
7
Hide whitespace changes
Inline
Side-by-side
biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala
View file @
6aefe25a
...
...
@@ -16,11 +16,12 @@ package nl.lumc.sasc.biopet.extensions.tools
import
java.io.File
import
nl.lumc.sasc.biopet.core.summary.
{
Summarizable
,
SummaryQScript
}
import
nl.lumc.sasc.biopet.core.
{
Reference
,
ToolCommandFunction
}
import
nl.lumc.sasc.biopet.core.summary.
{
Summarizable
,
SummaryQScript
}
import
nl.lumc.sasc.biopet.core.
{
Reference
,
ToolCommandFunction
}
import
nl.lumc.sasc.biopet.tools.vcfstats.VcfStats
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.utils.tryToParseNumber
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
scala.io.Source
...
...
@@ -30,7 +31,7 @@ import scala.io.Source
* Created by pjvan_thof on 1/10/15.
*/
class
VcfStats
(
val
root
:
Configurable
)
extends
ToolCommandFunction
with
Summarizable
with
Reference
{
def
toolObject
=
nl
.
lumc
.
sasc
.
biopet
.
tools
.
VcfStats
def
toolObject
=
VcfStats
mainFunction
=
false
...
...
biopet-tools-package/src/main/scala/nl/lumc/sasc/biopet/BiopetToolsExecutable.scala
View file @
6aefe25a
...
...
@@ -14,7 +14,8 @@
*/
package
nl.lumc.sasc.biopet
import
nl.lumc.sasc.biopet.utils.
{
BiopetExecutable
,
MainCommand
}
import
nl.lumc.sasc.biopet.tools.vcfstats.VcfStats
import
nl.lumc.sasc.biopet.utils.
{
BiopetExecutable
,
MainCommand
}
object
BiopetToolsExecutable
extends
BiopetExecutable
{
...
...
@@ -47,7 +48,7 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl
.
lumc
.
sasc
.
biopet
.
tools
.
ValidateFastq
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
ValidateVcf
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
VcfFilter
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
VcfStats
,
VcfStats
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
VcfToTsv
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
VcfWithVcf
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
VepNormalizer
,
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/vcfstats/SampleStats.scala
0 → 100644
View file @
6aefe25a
package
nl.lumc.sasc.biopet.tools.vcfstats
import
scala.collection.mutable
/**
* class to store all sample relative stats
*
* @param genotypeStats Stores all genotype relative stats
* @param sampleToSample Stores sample to sample compare stats
*/
case
class
SampleStats
(
genotypeStats
:
mutable.Map
[
String
,
mutable.Map
[
String
,
mutable.Map
[
Any
,
Int
]]]
=
mutable
.
Map
(),
sampleToSample
:
mutable.Map
[
String
,
SampleToSampleStats
]
=
mutable
.
Map
())
{
/** Add an other class */
def
+=
(
other
:
SampleStats
)
:
Unit
=
{
for
((
key
,
value
)
<-
other
.
sampleToSample
)
{
if
(
this
.
sampleToSample
.
contains
(
key
))
this
.
sampleToSample
(
key
)
+=
value
else
this
.
sampleToSample
(
key
)
=
value
}
for
((
chr
,
chrMap
)
<-
other
.
genotypeStats
;
(
field
,
fieldMap
)
<-
chrMap
)
{
if
(!
this
.
genotypeStats
.
contains
(
chr
))
genotypeStats
+=
(
chr
->
mutable
.
Map
[
String
,
mutable.Map
[
Any
,
Int
]]())
val
thisField
=
this
.
genotypeStats
(
chr
).
get
(
field
)
if
(
thisField
.
isDefined
)
mergeStatsMap
(
thisField
.
get
,
fieldMap
)
else
this
.
genotypeStats
(
chr
)
+=
field
->
fieldMap
}
}
}
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/vcfstats/SampleToSampleStats.scala
0 → 100644
View file @
6aefe25a
package
nl.lumc.sasc.biopet.tools.vcfstats
/**
* Class to store sample to sample compare stats
* @param genotypeOverlap Number of genotypes match with other sample
* @param alleleOverlap Number of alleles also found in other sample
*/
case
class
SampleToSampleStats
(
var
genotypeOverlap
:
Int
=
0
,
var
alleleOverlap
:
Int
=
0
)
{
/** Add an other class */
def
+=
(
other
:
SampleToSampleStats
)
{
this
.
genotypeOverlap
+=
other
.
genotypeOverlap
this
.
alleleOverlap
+=
other
.
alleleOverlap
}
}
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/vcfstats/Stats.scala
0 → 100644
View file @
6aefe25a
package
nl.lumc.sasc.biopet.tools.vcfstats
import
scala.collection.mutable
/**
* General stats class to store vcf stats
*
* @param generalStats Stores are general stats
* @param samplesStats Stores all sample/genotype specific stats
*/
case
class
Stats
(
generalStats
:
mutable.Map
[
String
,
mutable.Map
[
String
,
mutable.Map
[
Any
,
Int
]]]
=
mutable
.
Map
(),
samplesStats
:
mutable.Map
[
String
,
SampleStats
]
=
mutable
.
Map
())
{
/** Add an other class */
def
+=
(
other
:
Stats
)
:
Stats
=
{
for
((
key
,
value
)
<-
other
.
samplesStats
)
{
if
(
this
.
samplesStats
.
contains
(
key
))
this
.
samplesStats
(
key
)
+=
value
else
this
.
samplesStats
(
key
)
=
value
}
for
((
chr
,
chrMap
)
<-
other
.
generalStats
;
(
field
,
fieldMap
)
<-
chrMap
)
{
if
(!
this
.
generalStats
.
contains
(
chr
))
generalStats
+=
(
chr
->
mutable
.
Map
[
String
,
mutable.Map
[
Any
,
Int
]]())
val
thisField
=
this
.
generalStats
(
chr
).
get
(
field
)
if
(
thisField
.
isDefined
)
Stats
.
mergeStatsMap
(
thisField
.
get
,
fieldMap
)
else
this
.
generalStats
(
chr
)
+=
field
->
fieldMap
}
this
}
}
object
Stats
{
/** Merge m2 into m1 */
def
mergeStatsMap
(
m1
:
mutable.Map
[
Any
,
Int
],
m2
:
mutable.Map
[
Any
,
Int
])
:
Unit
=
{
for
(
key
<-
m2
.
keySet
)
m1
(
key
)
=
m1
.
getOrElse
(
key
,
0
)
+
m2
(
key
)
}
/** Merge m2 into m1 */
def
mergeNestedStatsMap
(
m1
:
mutable.Map
[
String
,
mutable.Map
[
String
,
mutable.Map
[
Any
,
Int
]]],
m2
:
Map
[
String
,
Map
[
String
,
Map
[
Any
,
Int
]]])
:
Unit
=
{
for
((
chr
,
chrMap
)
<-
m2
;
(
field
,
fieldMap
)
<-
chrMap
)
{
if
(
m1
.
contains
(
chr
))
{
if
(
m1
(
chr
).
contains
(
field
))
{
for
((
key
,
value
)
<-
fieldMap
)
{
if
(
m1
(
chr
)(
field
).
contains
(
key
))
m1
(
chr
)(
field
)(
key
)
+=
value
else
m1
(
chr
)(
field
)(
key
)
=
value
}
}
else
m1
(
chr
)(
field
)
=
mutable
.
Map
(
fieldMap
.
toList
:
_
*
)
}
else
m1
(
chr
)
=
mutable
.
Map
(
field
->
mutable
.
Map
(
fieldMap
.
toList
:
_
*
))
}
}
}
\ No newline at end of file
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfStats.scala
→
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/
vcfstats/
VcfStats.scala
View file @
6aefe25a
...
...
@@ -12,17 +12,15 @@
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package
nl.lumc.sasc.biopet.tools
package
nl.lumc.sasc.biopet.tools
.vcfstats
import
java.io.
{
File
,
FileOutputStream
,
PrintWriter
}
import
htsjdk.samtools.reference.FastaSequenceFile
import
htsjdk.samtools.util.Interval
import
htsjdk.variant.variantcontext.
{
Allele
,
Genotype
,
VariantContext
}
import
htsjdk.variant.vcf.VCFFileReader
import
nl.lumc.sasc.biopet.utils.
{
FastaUtils
,
ToolCommand
}
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import
nl.lumc.sasc.biopet.utils.
{
FastaUtils
,
ToolCommand
}
import
scala.collection.JavaConversions._
import
scala.collection.mutable
...
...
@@ -110,86 +108,6 @@ object VcfStats extends ToolCommand {
|${genotypeWiggleOptions.mkString(", ")}"""
.
stripMargin
}
/**
* Class to store sample to sample compare stats
* @param genotypeOverlap Number of genotypes match with other sample
* @param alleleOverlap Number of alleles also found in other sample
*/
case
class
SampleToSampleStats
(
var
genotypeOverlap
:
Int
=
0
,
var
alleleOverlap
:
Int
=
0
)
{
/** Add an other class */
def
+=
(
other
:
SampleToSampleStats
)
{
this
.
genotypeOverlap
+=
other
.
genotypeOverlap
this
.
alleleOverlap
+=
other
.
alleleOverlap
}
}
/**
* class to store all sample relative stats
* @param genotypeStats Stores all genotype relative stats
* @param sampleToSample Stores sample to sample compare stats
*/
case
class
SampleStats
(
genotypeStats
:
mutable.Map
[
String
,
mutable.Map
[
String
,
mutable.Map
[
Any
,
Int
]]]
=
mutable
.
Map
(),
sampleToSample
:
mutable.Map
[
String
,
SampleToSampleStats
]
=
mutable
.
Map
())
{
/** Add an other class */
def
+=
(
other
:
SampleStats
)
:
Unit
=
{
for
((
key
,
value
)
<-
other
.
sampleToSample
)
{
if
(
this
.
sampleToSample
.
contains
(
key
))
this
.
sampleToSample
(
key
)
+=
value
else
this
.
sampleToSample
(
key
)
=
value
}
for
((
chr
,
chrMap
)
<-
other
.
genotypeStats
;
(
field
,
fieldMap
)
<-
chrMap
)
{
if
(!
this
.
genotypeStats
.
contains
(
chr
))
genotypeStats
+=
(
chr
->
mutable
.
Map
[
String
,
mutable.Map
[
Any
,
Int
]]())
val
thisField
=
this
.
genotypeStats
(
chr
).
get
(
field
)
if
(
thisField
.
isDefined
)
mergeStatsMap
(
thisField
.
get
,
fieldMap
)
else
this
.
genotypeStats
(
chr
)
+=
field
->
fieldMap
}
}
}
/**
* General stats class to store vcf stats
* @param generalStats Stores are general stats
* @param samplesStats Stores all sample/genotype specific stats
*/
case
class
Stats
(
generalStats
:
mutable.Map
[
String
,
mutable.Map
[
String
,
mutable.Map
[
Any
,
Int
]]]
=
mutable
.
Map
(),
samplesStats
:
mutable.Map
[
String
,
SampleStats
]
=
mutable
.
Map
())
{
/** Add an other class */
def
+=
(
other
:
Stats
)
:
Stats
=
{
for
((
key
,
value
)
<-
other
.
samplesStats
)
{
if
(
this
.
samplesStats
.
contains
(
key
))
this
.
samplesStats
(
key
)
+=
value
else
this
.
samplesStats
(
key
)
=
value
}
for
((
chr
,
chrMap
)
<-
other
.
generalStats
;
(
field
,
fieldMap
)
<-
chrMap
)
{
if
(!
this
.
generalStats
.
contains
(
chr
))
generalStats
+=
(
chr
->
mutable
.
Map
[
String
,
mutable.Map
[
Any
,
Int
]]())
val
thisField
=
this
.
generalStats
(
chr
).
get
(
field
)
if
(
thisField
.
isDefined
)
mergeStatsMap
(
thisField
.
get
,
fieldMap
)
else
this
.
generalStats
(
chr
)
+=
field
->
fieldMap
}
this
}
}
/** Merge m2 into m1 */
def
mergeStatsMap
(
m1
:
mutable.Map
[
Any
,
Int
],
m2
:
mutable.Map
[
Any
,
Int
])
:
Unit
=
{
for
(
key
<-
m2
.
keySet
)
m1
(
key
)
=
m1
.
getOrElse
(
key
,
0
)
+
m2
(
key
)
}
/** Merge m2 into m1 */
def
mergeNestedStatsMap
(
m1
:
mutable.Map
[
String
,
mutable.Map
[
String
,
mutable.Map
[
Any
,
Int
]]],
m2
:
Map
[
String
,
Map
[
String
,
Map
[
Any
,
Int
]]])
:
Unit
=
{
for
((
chr
,
chrMap
)
<-
m2
;
(
field
,
fieldMap
)
<-
chrMap
)
{
if
(
m1
.
contains
(
chr
))
{
if
(
m1
(
chr
).
contains
(
field
))
{
for
((
key
,
value
)
<-
fieldMap
)
{
if
(
m1
(
chr
)(
field
).
contains
(
key
))
m1
(
chr
)(
field
)(
key
)
+=
value
else
m1
(
chr
)(
field
)(
key
)
=
value
}
}
else
m1
(
chr
)(
field
)
=
mutable
.
Map
(
fieldMap
.
toList
:
_
*
)
}
else
m1
(
chr
)
=
mutable
.
Map
(
field
->
mutable
.
Map
(
fieldMap
.
toList
:
_
*
))
}
}
protected
var
cmdArgs
:
Args
=
_
val
defaultGenotypeFields
=
List
(
"DP"
,
"GQ"
,
"AD"
,
"AD-ref"
,
"AD-alt"
,
"AD-used"
,
"AD-not_used"
,
"general"
)
...
...
@@ -283,9 +201,9 @@ object VcfStats extends ToolCommand {
val
query
=
reader
.
query
(
interval
.
getContig
,
interval
.
getStart
,
interval
.
getEnd
)
if
(!
query
.
hasNext
)
{
mergeNestedStatsMap
(
stats
.
generalStats
,
fillGeneral
(
adInfoTags
))
Stats
.
mergeNestedStatsMap
(
stats
.
generalStats
,
fillGeneral
(
adInfoTags
))
for
(
sample
<-
samples
)
yield
{
mergeNestedStatsMap
(
stats
.
samplesStats
(
sample
).
genotypeStats
,
fillGenotype
(
adGenotypeTags
))
Stats
.
mergeNestedStatsMap
(
stats
.
samplesStats
(
sample
).
genotypeStats
,
fillGenotype
(
adGenotypeTags
))
}
chunkCounter
+=
1
}
...
...
@@ -293,10 +211,10 @@ object VcfStats extends ToolCommand {
for
(
record
<-
query
if
record
.
getStart
<=
interval
.
getEnd
)
{
mergeNestedStatsMap
(
stats
.
generalStats
,
checkGeneral
(
record
,
adInfoTags
))
Stats
.
mergeNestedStatsMap
(
stats
.
generalStats
,
checkGeneral
(
record
,
adInfoTags
))
for
(
sample1
<-
samples
)
yield
{
val
genotype
=
record
.
getGenotype
(
sample1
)
mergeNestedStatsMap
(
stats
.
samplesStats
(
sample1
).
genotypeStats
,
checkGenotype
(
record
,
genotype
,
adGenotypeTags
))
Stats
.
mergeNestedStatsMap
(
stats
.
samplesStats
(
sample1
).
genotypeStats
,
checkGenotype
(
record
,
genotype
,
adGenotypeTags
))
for
(
sample2
<-
samples
)
{
val
genotype2
=
record
.
getGenotype
(
sample2
)
if
(
genotype
.
getAlleles
==
genotype2
.
getAlleles
)
...
...
biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfStatsTest.scala
View file @
6aefe25a
...
...
@@ -15,11 +15,12 @@
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
java.nio.file.
{
Files
,
Paths
}
import
java.nio.file.
{
Files
,
Paths
}
import
htsjdk.variant.variantcontext.Allele
import
htsjdk.variant.vcf.VCFFileReader
import
nl.lumc.sasc.biopet.tools.VcfStats._
import
nl.lumc.sasc.biopet.tools.vcfstats.VcfStats
import
nl.lumc.sasc.biopet.tools.vcfstats.VcfStats._
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment