Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
498358c5
Commit
498358c5
authored
Jul 21, 2016
by
Peter van 't Hof
Browse files
Added contig stats to files
parent
fbe97ce0
Changes
2
Show whitespace changes
Inline
Side-by-side
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala
View file @
498358c5
...
...
@@ -52,7 +52,7 @@ object BamStats extends ToolCommand {
logger
.
info
(
"Start"
)
val
sequenceDict
=
validateReference
i
nBam
(
cmdArgs
.
bamFile
,
cmdArgs
.
referenceFasta
)
val
sequenceDict
=
validateReference
I
nBam
(
cmdArgs
.
bamFile
,
cmdArgs
.
referenceFasta
)
init
(
cmdArgs
.
outputDir
,
cmdArgs
.
bamFile
,
sequenceDict
,
cmdArgs
.
binSize
,
cmdArgs
.
threadBinSize
)
...
...
@@ -63,7 +63,7 @@ object BamStats extends ToolCommand {
* This will retrieve the [[SAMSequenceDictionary]] from the bam file.
* When `referenceFasta is given he will validate this against the bam file.`
*/
def
validateReference
i
nBam
(
bamFile
:
File
,
referenceFasta
:
Option
[
File
])
=
{
def
validateReference
I
nBam
(
bamFile
:
File
,
referenceFasta
:
Option
[
File
])
=
{
val
samReader
=
SamReaderFactory
.
makeDefault
().
open
(
bamFile
)
val
samHeader
=
samReader
.
getFileHeader
samReader
.
close
()
...
...
@@ -87,7 +87,7 @@ object BamStats extends ToolCommand {
*/
def
init
(
outputDir
:
File
,
bamFile
:
File
,
referenceDict
:
SAMSequenceDictionary
,
binSize
:
Int
,
threadBinSize
:
Int
)
:
Unit
=
{
val
contigsFutures
=
BedRecordList
.
fromDict
(
referenceDict
).
allRecords
.
map
{
contig
=>
Future
{
processContig
(
contig
,
bamFile
,
binSize
,
threadBinSize
)
}
Future
{
processContig
(
contig
,
bamFile
,
binSize
,
threadBinSize
,
outputDir
)
}
}
val
unmappedFuture
=
Future
{
processUnmappedReads
(
bamFile
)
}
...
...
@@ -96,13 +96,7 @@ object BamStats extends ToolCommand {
logger
.
info
(
s
"total: ${stats.totalReads}, unmapped: ${stats.unmapped}, secondary: ${stats.secondary}"
)
stats
.
mappingQualityHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"mapping_quality.tsv"
))
stats
.
insertSizeHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"insert_size.tsv"
))
stats
.
clippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"clipping.tsv"
))
stats
.
leftClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"left_clipping.tsv"
))
stats
.
rightClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"right_clipping.tsv"
))
stats
.
_5_ClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"5_prime_clipping.tsv"
))
stats
.
_3_ClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"3_prime_clipping.tsv"
))
stats
.
writeStatsToFiles
(
outputDir
)
}
/**
...
...
@@ -114,16 +108,21 @@ object BamStats extends ToolCommand {
* @param threadBinSize Thread binsize
* @return Output stats
*/
def
processContig
(
region
:
BedRecord
,
bamFile
:
File
,
binSize
:
Int
,
threadBinSize
:
Int
)
:
Stats
=
{
def
processContig
(
region
:
BedRecord
,
bamFile
:
File
,
binSize
:
Int
,
threadBinSize
:
Int
,
outputDir
:
File
)
:
Stats
=
{
val
scattersFutures
=
region
.
scatter
(
binSize
)
.
grouped
((
region
.
length
.
toDouble
/
threadBinSize
).
ceil
.
toInt
)
.
map
(
scatters
=>
Future
{
processThread
(
scatters
,
bamFile
)
})
waitOnFutures
(
scattersFutures
.
toList
,
Some
(
region
.
chr
))
val
stats
=
waitOnFutures
(
scattersFutures
.
toList
,
Some
(
region
.
chr
))
val
contigDir
=
new
File
(
outputDir
,
"contigs"
+
File
.
separator
+
region
.
chr
)
contigDir
.
mkdirs
()
stats
.
writeStatsToFiles
(
contigDir
)
stats
}
/**
* This method will wait when all futures are complete and collect a single [[Stats]] instance
*
* @param futures List of futures to monitor
* @param msg Optional message for logging
* @return Output stats
...
...
@@ -151,7 +150,7 @@ object BamStats extends ToolCommand {
/**
* This method will process 1 thread bin
*
* @param scatters bins to check
* @param scatters bins to check
, there should be no gaps withing the scatters
* @param bamFile Input bamfile
* @return Output stats
*/
...
...
@@ -203,6 +202,7 @@ object BamStats extends ToolCommand {
/**
* This method will only count the unmapped fragments
*
* @param bamFile Input bamfile
* @return Output stats
*/
...
...
biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala
View file @
498358c5
package
nl.lumc.sasc.biopet.tools.bamstats
import
java.io.File
/**
* Created by pjvanthof on 05/07/16.
*/
...
...
@@ -27,4 +29,14 @@ case class Stats(var totalReads: Long = 0L,
this
.
_3_ClippingHistogram
+=
other
.
_3_ClippingHistogram
this
}
def
writeStatsToFiles
(
outputDir
:
File
)
:
Unit
=
{
this
.
mappingQualityHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"mapping_quality.tsv"
))
this
.
insertSizeHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"insert_size.tsv"
))
this
.
clippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"clipping.tsv"
))
this
.
leftClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"left_clipping.tsv"
))
this
.
rightClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"right_clipping.tsv"
))
this
.
_5_ClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"5_prime_clipping.tsv"
))
this
.
_3_ClippingHistogram
.
writeToTsv
(
new
File
(
outputDir
,
"3_prime_clipping.tsv"
))
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment