Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
fbe97ce0
Commit
fbe97ce0
authored
Jul 21, 2016
by
Peter van 't Hof
Browse files
Added test to Stats class
parent
7a3e8399
Changes
6
Hide whitespace changes
Inline
Side-by-side
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala
View file @
fbe97ce0
...
...
@@ -4,7 +4,7 @@ import java.io.File
import
java.util.concurrent.TimeoutException
import
htsjdk.samtools.reference.FastaSequenceFile
import
htsjdk.samtools.
{
SAMSequenceDictionary
,
SamReaderFactory
}
import
htsjdk.samtools.
{
SAMSequenceDictionary
,
SamReaderFactory
}
import
nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck
import
nl.lumc.sasc.biopet.utils.ToolCommand
import
nl.lumc.sasc.biopet.utils.intervals.
{
BedRecord
,
BedRecordList
}
...
...
@@ -13,6 +13,7 @@ import scala.collection.JavaConversions._
import
scala.concurrent.ExecutionContext.Implicits.global
import
scala.concurrent.duration._
import
scala.concurrent.
{
Await
,
Future
}
import
scala.language.postfixOps
/**
* This tool will collect stats from a bamfile
...
...
@@ -76,14 +77,14 @@ object BamStats extends ToolCommand {
}
/**
* This is the main running function of [[BamStats]]. This will start the thereads and collect and write the results.
*
* @param outputDir All output files will be placed here
* @param bamFile Input bam file
* @param referenceDict Dict for scattering
* @param binSize stats binsize
* @param threadBinSize Thread binsize
*/
* This is the main running function of [[BamStats]]. This will start the thereads and collect and write the results.
*
* @param outputDir All output files will be placed here
* @param bamFile Input bam file
* @param referenceDict Dict for scattering
* @param binSize stats binsize
* @param threadBinSize Thread binsize
*/
def
init
(
outputDir
:
File
,
bamFile
:
File
,
referenceDict
:
SAMSequenceDictionary
,
binSize
:
Int
,
threadBinSize
:
Int
)
:
Unit
=
{
val
contigsFutures
=
BedRecordList
.
fromDict
(
referenceDict
).
allRecords
.
map
{
contig
=>
Future
{
processContig
(
contig
,
bamFile
,
binSize
,
threadBinSize
)
}
...
...
@@ -105,28 +106,28 @@ object BamStats extends ToolCommand {
}
/**
* This will start the subjobs for each contig and collect [[Stats]] on contig level
*
* @param region Region to check, mostly yhis is the complete contig
* @param bamFile Input bam file
* @param binSize stats binsize
* @param threadBinSize Thread binsize
* @return Output stats
*/
* This will start the subjobs for each contig and collect [[Stats]] on contig level
*
* @param region Region to check, mostly yhis is the complete contig
* @param bamFile Input bam file
* @param binSize stats binsize
* @param threadBinSize Thread binsize
* @return Output stats
*/
def
processContig
(
region
:
BedRecord
,
bamFile
:
File
,
binSize
:
Int
,
threadBinSize
:
Int
)
:
Stats
=
{
val
scattersFutures
=
region
.
scatter
(
binSize
)
.
grouped
((
region
.
length
.
toDouble
/
threadBinSize
).
ceil
.
toInt
)
.
map
(
scatters
=>
Future
{
processThread
(
scatters
,
bamFile
)
})
.
map
(
scatters
=>
Future
{
processThread
(
scatters
,
bamFile
)
})
waitOnFutures
(
scattersFutures
.
toList
,
Some
(
region
.
chr
))
}
/**
* This method will wait when all futures are complete and collect a single [[Stats]] instance
* @param futures List of futures to monitor
* @param msg Optional message for logging
* @return Output stats
*/
* This method will wait when all futures are complete and collect a single [[Stats]] instance
* @param futures List of futures to monitor
* @param msg Optional message for logging
* @return Output stats
*/
def
waitOnFutures
(
futures
:
List
[
Future
[
Stats
]],
msg
:
Option
[
String
]
=
None
)
:
Stats
=
{
msg
.
foreach
(
m
=>
logger
.
info
(
s
"Start monitoring jobs for '$m', ${futures.size} jobs"
))
futures
.
foreach
(
_
.
onFailure
{
case
t
=>
throw
new
RuntimeException
(
t
)
})
...
...
@@ -148,12 +149,12 @@ object BamStats extends ToolCommand {
}
/**
* This method will process 1 thread bin
*
* @param scatters bins to check
* @param bamFile Input bamfile
* @return Output stats
*/
* This method will process 1 thread bin
*
* @param scatters bins to check
* @param bamFile Input bamfile
* @return Output stats
*/
def
processThread
(
scatters
:
List
[
BedRecord
],
bamFile
:
File
)
:
Stats
=
{
val
totalStats
=
Stats
()
val
sortedScatters
=
scatters
.
sortBy
(
_
.
start
)
...
...
@@ -201,10 +202,10 @@ object BamStats extends ToolCommand {
}
/**
* This method will only count the unmapped fragments
* @param bamFile Input bamfile
* @return Output stats
*/
* This method will only count the unmapped fragments
* @param bamFile Input bamfile
* @return Output stats
*/
def
processUnmappedReads
(
bamFile
:
File
)
:
Stats
=
{
val
stats
=
Stats
()
val
samReader
=
SamReaderFactory
.
makeDefault
().
open
(
bamFile
)
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Histogram.scala
View file @
fbe97ce0
...
...
@@ -34,6 +34,13 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T
counts
.
keys
.
toList
.
sorted
.
foreach
(
x
=>
writer
.
println
(
s
"$x\t${counts(x)}"
))
writer
.
close
()
}
override
def
equals
(
other
:
Any
)
:
Boolean
=
{
other
match
{
case
c
:
Counts
[
T
]
=>
this
.
counts
==
c
.
counts
case
_
=>
false
}
}
}
class
Histogram
[
T
](
_counts
:
Map
[
T
,
Long
]
=
Map
[
T
,
Long
]())(
implicit
ord
:
Numeric
[
T
])
extends
Counts
[
T
](
_counts
)
{
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala
View file @
fbe97ce0
...
...
@@ -3,18 +3,16 @@ package nl.lumc.sasc.biopet.tools.bamstats
/**
* Created by pjvanthof on 05/07/16.
*/
case
class
Stats
()
{
var
totalReads
=
0L
var
unmapped
=
0L
var
secondary
=
0L
val
mappingQualityHistogram
=
new
Histogram
[
Int
]()
val
insertSizeHistogram
=
new
Histogram
[
Int
]()
val
clippingHistogram
=
new
Histogram
[
Int
]()
val
leftClippingHistogram
=
new
Histogram
[
Int
]()
val
rightClippingHistogram
=
new
Histogram
[
Int
]()
val
_5_ClippingHistogram
=
new
Histogram
[
Int
]()
val
_3_ClippingHistogram
=
new
Histogram
[
Int
]()
case
class
Stats
(
var
totalReads
:
Long
=
0L
,
var
unmapped
:
Long
=
0L
,
var
secondary
:
Long
=
0L
,
mappingQualityHistogram
:
Histogram
[
Int
]
=
new
Histogram
[
Int
](),
insertSizeHistogram
:
Histogram
[
Int
]
=
new
Histogram
[
Int
](),
clippingHistogram
:
Histogram
[
Int
]
=
new
Histogram
[
Int
](),
leftClippingHistogram
:
Histogram
[
Int
]
=
new
Histogram
[
Int
](),
rightClippingHistogram
:
Histogram
[
Int
]
=
new
Histogram
[
Int
](),
_5_ClippingHistogram
:
Histogram
[
Int
]
=
new
Histogram
[
Int
](),
_3_ClippingHistogram
:
Histogram
[
Int
]
=
new
Histogram
[
Int
]())
{
/** This will add an other [[Stats]] inside `this` */
def
+=
(
other
:
Stats
)
:
Stats
=
{
...
...
biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/CountsTest.scala
View file @
fbe97ce0
...
...
@@ -9,12 +9,12 @@ import org.testng.annotations.Test
import
scala.io.Source
/**
* Created by pjvan_thof on 19-7-16.
*/
* Created by pjvan_thof on 19-7-16.
*/
class
CountsTest
extends
TestNGSuite
with
Matchers
{
@Test
def
testValues
:
Unit
=
{
val
data
:
Map
[
String
,
Long
]
=
Map
(
"1"
->
1
,
"2"
->
2
,
"3"
->
3
)
def
testValues
()
:
Unit
=
{
val
data
:
Map
[
String
,
Long
]
=
Map
(
"1"
->
1
,
"2"
->
2
,
"3"
->
3
)
val
c1
=
new
Counts
[
String
](
data
)
c1
.
countsMap
shouldBe
data
c1
.
get
(
"1"
)
shouldBe
Some
(
1
)
...
...
@@ -37,14 +37,36 @@ class CountsTest extends TestNGSuite with Matchers {
}
@Test
def
testEmpty
:
Unit
=
{
def
testEmpty
()
:
Unit
=
{
val
c1
=
new
Counts
[
Int
]()
c1
.
countsMap
.
isEmpty
shouldBe
true
}
@Test
def
testTsv
:
Unit
=
{
val
data
:
Map
[
Int
,
Long
]
=
Map
(
1
->
1
,
2
->
2
,
3
->
3
)
def
testEqual
()
:
Unit
=
{
val
c1
=
new
Counts
[
Int
]()
val
c2
=
new
Counts
[
Int
]()
c1
should
not
be
"be a string"
c1
shouldBe
c1
c2
shouldBe
c2
c1
shouldBe
c2
c1
.
add
(
1
)
c1
shouldBe
c1
c2
shouldBe
c2
c1
should
not
be
c2
c2
.
add
(
1
)
c1
shouldBe
c1
c2
shouldBe
c2
c1
shouldBe
c2
}
@Test
def
testTsv
()
:
Unit
=
{
val
data
:
Map
[
Int
,
Long
]
=
Map
(
1
->
1
,
2
->
2
,
3
->
3
)
val
c1
=
new
Counts
[
Int
](
data
)
val
tsvFile
=
File
.
createTempFile
(
"counts."
,
".tsv"
)
...
...
biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/HistogramTest.scala
View file @
fbe97ce0
...
...
@@ -9,12 +9,12 @@ import org.testng.annotations.Test
import
scala.io.Source
/**
* Created by pjvan_thof on 19-7-16.
*/
* Created by pjvan_thof on 19-7-16.
*/
class
HistogramTest
extends
TestNGSuite
with
Matchers
{
@Test
def
testValues
:
Unit
=
{
val
data
:
Map
[
Int
,
Long
]
=
Map
(
1
->
1
,
2
->
2
,
3
->
3
)
val
data
:
Map
[
Int
,
Long
]
=
Map
(
1
->
1
,
2
->
2
,
3
->
3
)
val
c1
=
new
Histogram
[
Int
](
data
)
c1
.
countsMap
shouldBe
data
c1
.
get
(
1
)
shouldBe
Some
(
1
)
...
...
@@ -44,7 +44,7 @@ class HistogramTest extends TestNGSuite with Matchers {
@Test
def
testTsv
:
Unit
=
{
val
data
:
Map
[
Int
,
Long
]
=
Map
(
1
->
1
,
2
->
2
,
3
->
3
)
val
data
:
Map
[
Int
,
Long
]
=
Map
(
1
->
1
,
2
->
2
,
3
->
3
)
val
c1
=
new
Histogram
[
Int
](
data
)
val
tsvFile
=
File
.
createTempFile
(
"counts."
,
".tsv"
)
...
...
biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/StatsTest.scala
0 → 100644
View file @
fbe97ce0
package
nl.lumc.sasc.biopet.tools.bamstats
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
/**
* Created by pjvan_thof on 19-7-16.
*/
class
StatsTest
extends
TestNGSuite
with
Matchers
{
@Test
def
testEqual
()
:
Unit
=
{
val
s1
=
new
Stats
()
val
s2
=
new
Stats
()
s1
shouldBe
s2
s1
.
totalReads
+=
1
s1
should
not
be
s2
s2
.
totalReads
+=
1
s1
shouldBe
s2
s1
.
mappingQualityHistogram
.
add
(
1
)
s1
should
not
be
s2
s2
.
mappingQualityHistogram
.
add
(
1
)
s1
shouldBe
s2
}
@Test
def
testEmpty
()
:
Unit
=
{
val
stats
=
new
Stats
()
stats
.
totalReads
shouldBe
0
stats
.
unmapped
shouldBe
0
stats
.
secondary
shouldBe
0
stats
.
clippingHistogram
.
countsMap
shouldBe
empty
stats
.
insertSizeHistogram
.
countsMap
shouldBe
empty
stats
.
mappingQualityHistogram
.
countsMap
shouldBe
empty
stats
.
leftClippingHistogram
.
countsMap
shouldBe
empty
stats
.
rightClippingHistogram
.
countsMap
shouldBe
empty
stats
.
_5_ClippingHistogram
.
countsMap
shouldBe
empty
stats
.
_3_ClippingHistogram
.
countsMap
shouldBe
empty
}
@Test
def
testPlus
:
Unit
=
{
val
s1
=
new
Stats
()
val
s2
=
new
Stats
()
s2
.
totalReads
+=
1
s2
.
_3_ClippingHistogram
.
add
(
1
)
s1
.
totalReads
shouldBe
0
s1
.
_3_ClippingHistogram
.
get
(
1
)
shouldBe
None
s1
+=
s2
s1
.
totalReads
shouldBe
1
s1
.
_3_ClippingHistogram
.
get
(
1
)
shouldBe
Some
(
1
)
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment