Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
6f02f6d8
Commit
6f02f6d8
authored
Mar 23, 2017
by
Peter van 't Hof
Browse files
Merge remote-tracking branch 'remotes/origin/epic-summary_reformat' into fix-BIOPET-587
parents
b83e9b26
4e8c4e4f
Changes
7
Hide whitespace changes
Inline
Side-by-side
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala
View file @
6f02f6d8
...
...
@@ -39,7 +39,8 @@ object BamStats extends ToolCommand {
bamFile
:
File
=
null
,
referenceFasta
:
Option
[
File
]
=
None
,
binSize
:
Int
=
10000
,
threadBinSize
:
Int
=
10000000
)
extends
AbstractArgs
threadBinSize
:
Int
=
10000000
,
tsvOutputs
:
Boolean
=
false
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'R'
,
"reference"
)
valueName
"<file>"
action
{
(
x
,
c
)
=>
...
...
@@ -57,6 +58,9 @@ object BamStats extends ToolCommand {
opt
[
Int
](
"threadBinSize"
)
valueName
"<int>"
action
{
(
x
,
c
)
=>
c
.
copy
(
threadBinSize
=
x
)
}
text
"Size of region per thread"
opt
[
Unit
](
"tsvOutputs"
)
action
{
(
x
,
c
)
=>
c
.
copy
(
tsvOutputs
=
true
)
}
text
"Also output tsv files, default there is only a json"
}
/** This is the main entry to [[BamStats]], this will do the argument parsing. */
...
...
@@ -68,7 +72,7 @@ object BamStats extends ToolCommand {
val
sequenceDict
=
validateReferenceInBam
(
cmdArgs
.
bamFile
,
cmdArgs
.
referenceFasta
)
init
(
cmdArgs
.
outputDir
,
cmdArgs
.
bamFile
,
sequenceDict
,
cmdArgs
.
binSize
,
cmdArgs
.
threadBinSize
)
init
(
cmdArgs
.
outputDir
,
cmdArgs
.
bamFile
,
sequenceDict
,
cmdArgs
.
binSize
,
cmdArgs
.
threadBinSize
,
cmdArgs
.
tsvOutputs
)
logger
.
info
(
"Done"
)
}
...
...
@@ -96,13 +100,26 @@ object BamStats extends ToolCommand {
* @param binSize stats binsize
* @param threadBinSize Thread binsize
*/
def
init
(
outputDir
:
File
,
bamFile
:
File
,
referenceDict
:
SAMSequenceDictionary
,
binSize
:
Int
,
threadBinSize
:
Int
)
:
Unit
=
{
def
init
(
outputDir
:
File
,
bamFile
:
File
,
referenceDict
:
SAMSequenceDictionary
,
binSize
:
Int
,
threadBinSize
:
Int
,
tsvOutput
:
Boolean
)
:
Unit
=
{
val
contigsFutures
=
BedRecordList
.
fromDict
(
referenceDict
).
allRecords
.
map
{
contig
=>
contig
.
chr
->
processContig
(
contig
,
bamFile
,
binSize
,
threadBinSize
,
outputDir
)
}.
toList
val
stats
=
waitOnFutures
(
processUnmappedReads
(
bamFile
)
::
contigsFutures
.
map
(
_
.
_2
))
if
(
tsvOutput
)
{
stats
.
flagstat
.
writeAsTsv
(
new
File
(
outputDir
,
"flagstats.tsv"
))
stats
.
insertSizeHistogram
.
writeFilesAndPlot
(
outputDir
,
"insertsize"
,
"Insertsize"
,
"Reads"
,
"Insertsize distribution"
)
stats
.
mappingQualityHistogram
.
writeFilesAndPlot
(
outputDir
,
"mappingQuality"
,
"Mapping Quality"
,
"Reads"
,
"Mapping Quality distribution"
)
stats
.
clippingHistogram
.
writeFilesAndPlot
(
outputDir
,
"clipping"
,
"CLipped bases"
,
"Reads"
,
"Clipping distribution"
)
stats
.
leftClippingHistogram
.
writeFilesAndPlot
(
outputDir
,
"left_clipping"
,
"CLipped bases"
,
"Reads"
,
"Left Clipping distribution"
)
stats
.
rightClippingHistogram
.
writeFilesAndPlot
(
outputDir
,
"right_clipping"
,
"CLipped bases"
,
"Reads"
,
"Right Clipping distribution"
)
stats
.
_3_ClippingHistogram
.
writeFilesAndPlot
(
outputDir
,
"3prime_clipping"
,
"CLipped bases"
,
"Reads"
,
"3 Prime Clipping distribution"
)
stats
.
_5_ClippingHistogram
.
writeFilesAndPlot
(
outputDir
,
"5prime_clipping"
,
"CLipped bases"
,
"Reads"
,
"5 Prime Clipping distribution"
)
}
val
statsWriter
=
new
PrintWriter
(
new
File
(
outputDir
,
"bamstats.json"
))
val
totalStats
=
stats
.
toSummaryMap
val
statsMap
=
Map
(
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Histogram.scala
View file @
6f02f6d8
...
...
@@ -14,8 +14,10 @@
*/
package
nl.lumc.sasc.biopet.tools.bamstats
import
java.io.
{
File
,
PrintWriter
}
import
nl.lumc.sasc.biopet.utils.sortAnyAny
import
java.io.
{
File
,
IOException
,
PrintWriter
}
import
nl.lumc.sasc.biopet.utils.rscript.LinePlot
import
nl.lumc.sasc.biopet.utils.
{
Logging
,
sortAnyAny
}
import
scala.collection.mutable
...
...
@@ -43,7 +45,7 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T
}
/** Write histogram to a tsv/count file */
def
writeToTsv
(
file
:
File
)
:
Unit
=
{
def
write
Histogram
ToTsv
(
file
:
File
)
:
Unit
=
{
val
writer
=
new
PrintWriter
(
file
)
writer
.
println
(
"value\tcount"
)
counts
.
keys
.
toList
.
sorted
.
foreach
(
x
=>
writer
.
println
(
s
"$x\t${counts(x)}"
))
...
...
@@ -82,4 +84,28 @@ class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric
}
else
Map
()
}
/** Write histogram to a tsv/count file */
def
writeAggregateToTsv
(
file
:
File
)
:
Unit
=
{
val
writer
=
new
PrintWriter
(
file
)
aggregateStats
.
foreach
(
x
=>
writer
.
println
(
x
.
_1
+
"\t"
+
x
.
_2
))
writer
.
close
()
}
def
writeFilesAndPlot
(
outputDir
:
File
,
prefix
:
String
,
xlabel
:
String
,
ylabel
:
String
,
title
:
String
)
:
Unit
=
{
writeHistogramToTsv
(
new
File
(
outputDir
,
prefix
+
".histogram.tsv"
))
writeAggregateToTsv
(
new
File
(
outputDir
,
prefix
+
".stats.tsv"
))
val
plot
=
new
LinePlot
(
null
)
plot
.
input
=
new
File
(
outputDir
,
prefix
+
".histogram.tsv"
)
plot
.
output
=
new
File
(
outputDir
,
prefix
+
".histogram.png"
)
plot
.
xlabel
=
Some
(
xlabel
)
plot
.
ylabel
=
Some
(
ylabel
)
plot
.
title
=
Some
(
title
)
try
{
plot
.
runLocal
()
}
catch
{
// If plotting fails the tools should not fail, this depens on R to be installed
case
e
:
IOException
=>
Logging
.
logger
.
warn
(
s
"Error found while plotting ${plot.output}: ${e.getMessage}"
)
}
}
}
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala
View file @
6f02f6d8
...
...
@@ -50,13 +50,13 @@ case class Stats(flagstat: FlagstatCollector = new FlagstatCollector(),
def
writeStatsToFiles
(
outputDir
:
File
)
:
Unit
=
{
this
.
flagstat
.
writeReportToFile
(
new
File
(
outputDir
,
"flagstats"
))
this
.
flagstat
.
writeSummaryTofile
(
new
File
(
outputDir
,
"flagstats.summary.json"
))
this
.
mappingQualityHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"mapping_quality.tsv"
))
this
.
insertSizeHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"insert_size.tsv"
))
this
.
clippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"clipping.tsv"
))
this
.
leftClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"left_clipping.tsv"
))
this
.
rightClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"right_clipping.tsv"
))
this
.
_5_ClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"5_prime_clipping.tsv"
))
this
.
_3_ClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"3_prime_clipping.tsv"
))
this
.
mappingQualityHistogram
.
write
Histogram
ToTsv
(
new
File
(
outputDir
,
"mapping_quality.tsv"
))
this
.
insertSizeHistogram
.
write
Histogram
ToTsv
(
new
File
(
outputDir
,
"insert_size.tsv"
))
this
.
clippingHistogram
.
write
Histogram
ToTsv
(
new
File
(
outputDir
,
"clipping.tsv"
))
this
.
leftClippingHistogram
.
write
Histogram
ToTsv
(
new
File
(
outputDir
,
"left_clipping.tsv"
))
this
.
rightClippingHistogram
.
write
Histogram
ToTsv
(
new
File
(
outputDir
,
"right_clipping.tsv"
))
this
.
_5_ClippingHistogram
.
write
Histogram
ToTsv
(
new
File
(
outputDir
,
"5_prime_clipping.tsv"
))
this
.
_3_ClippingHistogram
.
write
Histogram
ToTsv
(
new
File
(
outputDir
,
"3_prime_clipping.tsv"
))
}
def
toSummaryMap
=
{
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/flagstat/FlagstatCollector.scala
View file @
6f02f6d8
...
...
@@ -32,6 +32,12 @@ class FlagstatCollector {
protected
[
FlagstatCollector
]
var
totalCounts
:
Array
[
Long
]
=
Array
()
protected
[
FlagstatCollector
]
var
crossCounts
=
Array
.
ofDim
[
Long
](
1
,
1
)
def
writeAsTsv
(
file
:
File
)
:
Unit
=
{
val
writer
=
new
PrintWriter
(
file
)
names
.
foreach
(
x
=>
writer
.
println
(
x
.
_2
+
"\t"
+
totalCounts
(
x
.
_1
)))
writer
.
close
()
}
def
loadDefaultFunctions
()
{
addFunction
(
"All"
,
record
=>
true
)
addFunction
(
"Mapped"
,
record
=>
!
record
.
getReadUnmappedFlag
)
...
...
biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStatsTest.scala
View file @
6f02f6d8
...
...
@@ -34,6 +34,43 @@ class BamStatsTest extends TestNGSuite with Matchers {
new
File
(
outputDir
,
"bamstats.json"
)
should
exist
new
File
(
outputDir
,
"bamstats.summary.json"
)
should
exist
new
File
(
outputDir
,
"flagstats.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"insertsize.stats.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"insertsize.histogram.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"mappingQuality.stats.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"mappingQuality.histogram.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"clipping.stats.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"clipping.histogram.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"flagstats"
)
shouldNot
exist
new
File
(
outputDir
,
"flagstats.summary.json"
)
shouldNot
exist
new
File
(
outputDir
,
"mapping_quality.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"insert_size.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"clipping.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"left_clipping.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"right_clipping.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"5_prime_clipping.tsv"
)
shouldNot
exist
new
File
(
outputDir
,
"3_prime_clipping.tsv"
)
shouldNot
exist
}
@Test
def
testTsvOutputs
:
Unit
=
{
val
outputDir
=
Files
.
createTempDir
()
outputDir
.
deleteOnExit
()
BamStats
.
main
(
Array
(
"-b"
,
BamStatsTest
.
pairedBam01
.
getAbsolutePath
,
"-o"
,
outputDir
.
getAbsolutePath
,
"--tsvOutputs"
))
new
File
(
outputDir
,
"bamstats.json"
)
should
exist
new
File
(
outputDir
,
"bamstats.summary.json"
)
should
exist
new
File
(
outputDir
,
"flagstats.tsv"
)
should
exist
new
File
(
outputDir
,
"insertsize.stats.tsv"
)
should
exist
new
File
(
outputDir
,
"insertsize.histogram.tsv"
)
should
exist
new
File
(
outputDir
,
"mappingQuality.stats.tsv"
)
should
exist
new
File
(
outputDir
,
"mappingQuality.histogram.tsv"
)
should
exist
new
File
(
outputDir
,
"clipping.stats.tsv"
)
should
exist
new
File
(
outputDir
,
"clipping.histogram.tsv"
)
should
exist
new
File
(
outputDir
,
"flagstats"
)
shouldNot
exist
new
File
(
outputDir
,
"flagstats.summary.json"
)
shouldNot
exist
new
File
(
outputDir
,
"mapping_quality.tsv"
)
shouldNot
exist
...
...
biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/CountsTest.scala
View file @
6f02f6d8
...
...
@@ -86,7 +86,7 @@ class CountsTest extends TestNGSuite with Matchers {
val
tsvFile
=
File
.
createTempFile
(
"counts."
,
".tsv"
)
tsvFile
.
deleteOnExit
()
c1
.
writeToTsv
(
tsvFile
)
c1
.
write
Histogram
ToTsv
(
tsvFile
)
val
reader
=
Source
.
fromFile
(
tsvFile
)
reader
.
getLines
().
toList
shouldBe
List
(
"value\tcount"
,
"1\t1"
,
"2\t2"
,
"3\t3"
)
...
...
biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/HistogramTest.scala
View file @
6f02f6d8
...
...
@@ -64,7 +64,7 @@ class HistogramTest extends TestNGSuite with Matchers {
val
tsvFile
=
File
.
createTempFile
(
"counts."
,
".tsv"
)
tsvFile
.
deleteOnExit
()
c1
.
writeToTsv
(
tsvFile
)
c1
.
write
Histogram
ToTsv
(
tsvFile
)
val
reader
=
Source
.
fromFile
(
tsvFile
)
reader
.
getLines
().
toList
shouldBe
List
(
"value\tcount"
,
"1\t1"
,
"2\t2"
,
"3\t3"
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment