Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
658b77f5
Commit
658b77f5
authored
Mar 09, 2016
by
Wai Yi Leung
Browse files
Merge branch 'develop' into feature-mergesvcalls
parents
7e973ac3
b34fc129
Changes
13
Hide whitespace changes
Inline
Side-by-side
public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala
View file @
658b77f5
...
...
@@ -194,10 +194,11 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
protected
def
sampleIds
:
Set
[
String
]
=
ConfigUtils
.
any2map
(
globalConfig
.
map
(
"samples"
)).
keySet
protected
lazy
val
nameRegex
=
"""^[a-zA-Z0-9][a-zA-Z0-9-_]+[a-zA-Z0-9]$"""
.
r
protected
lazy
val
nameError
=
"
name
invalid
.
"
+
"
N
am
e
must have at least 3 characters,"
+
protected
lazy
val
nameError
=
"
has an
invalid
name.
"
+
"
S
am
ple names
must have at least 3 characters,
"
+
"must begin and end with an alphanumeric character, "
+
"and must not have whitespace."
"and must not have whitespace and special characters. "
+
"Dash (-) and underscore (_) are permitted."
/** Runs addAndTrackJobs method for each sample */
final
def
addSamplesJobs
()
{
...
...
public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/report/ReportBuilder.scala
View file @
658b77f5
...
...
@@ -93,9 +93,9 @@ trait ReportBuilder extends ToolCommand {
private
var
total
=
0
private
var
_sampleId
:
Option
[
String
]
=
None
protected
def
sampleId
=
_sampleId
protected
[
report
]
def
sampleId
=
_sampleId
private
var
_libId
:
Option
[
String
]
=
None
protected
def
libId
=
_libId
protected
[
report
]
def
libId
=
_libId
case
class
ExtFile
(
resourcePath
:
String
,
targetPath
:
String
)
...
...
@@ -152,6 +152,8 @@ trait ReportBuilder extends ToolCommand {
total
=
ReportBuilder
.
countPages
(
indexPage
)
logger
.
info
(
total
+
" pages to be generated"
)
done
=
0
logger
.
info
(
"Generate pages"
)
val
jobs
=
generatePage
(
summary
,
indexPage
,
cmdArgs
.
outputDir
,
args
=
pageArgs
++
cmdArgs
.
pageArgs
.
toMap
++
...
...
@@ -216,7 +218,7 @@ object ReportBuilder {
protected
val
engine
=
new
TemplateEngine
()
/** Cache of temp file for templates from the classpath / jar */
private
var
templateCache
:
Map
[
String
,
File
]
=
Map
()
private
[
report
]
var
templateCache
:
Map
[
String
,
File
]
=
Map
()
/** This will give the total number of pages including all nested pages */
def
countPages
(
page
:
ReportPage
)
:
Int
=
{
...
...
public/biopet-core/src/test/resources/empty_summary.json
0 → 100644
View file @
658b77f5
{
"samples"
:
{
"sampleName"
:
{
"libraries"
:
{
"libName"
:
{
}
}
}
}
}
\ No newline at end of file
public/biopet-core/src/test/resources/template.ssp
0 → 100644
View file @
658b77f5
<%@ var arg: String%>
${arg}
\ No newline at end of file
public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/MultiSampleQScriptTest.scala
View file @
658b77f5
...
...
@@ -4,13 +4,14 @@ import java.io.File
import
nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import
nl.lumc.sasc.biopet.core.extensions.Md5sum
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
nl.lumc.sasc.biopet.utils.
{
Logging
,
ConfigUtils
}
import
nl.lumc.sasc.biopet.utils.config.Config
import
org.broadinstitute.gatk.queue.QScript
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
import
scala.language.reflectiveCalls
import
scala.collection.mutable.ListBuffer
/**
...
...
@@ -90,6 +91,20 @@ class MultiSampleQScriptTest extends TestNGSuite with Matchers {
script
.
functions
.
size
shouldBe
1
}
@Test
def
testInvalidSampleName
:
Unit
=
{
val
script
=
MultiSampleQScriptTest
(
sample4
::
Nil
)
script
.
init
()
script
.
biopetScript
()
val
msg
=
script
.
getLastLogMessage
msg
shouldBe
"Sample 'Something.Invalid' has an invalid name. "
+
"Sample names must have at least 3 characters, "
+
"must begin and end with an alphanumeric character, "
+
"and must not have whitespace and special characters. "
+
"Dash (-) and underscore (_) are permitted."
}
}
object
MultiSampleQScriptTest
{
...
...
@@ -120,6 +135,10 @@ object MultiSampleQScriptTest {
"lib3"
->
Map
(
"test"
->
"3-3"
)
))))
val
sample4
=
Map
(
"samples"
->
Map
(
"Something.Invalid"
->
Map
(
"libraries"
->
Map
(
"lib1"
->
Map
(
"test"
->
"4-1"
)
))))
val
child
=
Map
(
"samples"
->
Map
(
"child"
->
Map
(
"tags"
->
Map
(
"gender"
->
"male"
,
"father"
->
"father"
,
"mother"
->
"mother"
))))
val
father
=
Map
(
"samples"
->
Map
(
"father"
->
Map
(
"tags"
->
Map
(
"gender"
->
"male"
))))
...
...
@@ -136,6 +155,11 @@ object MultiSampleQScriptTest {
.
foldLeft
(
Map
[
String
,
Any
]())
{
case
(
a
,
b
)
=>
ConfigUtils
.
mergeMaps
(
a
,
b
)
})
val
root
=
null
def
getLastLogMessage
:
String
=
{
Logging
.
errors
.
toList
.
last
.
getMessage
}
class
Sample
(
id
:
String
)
extends
AbstractSample
(
id
)
{
class
Library
(
id
:
String
)
extends
AbstractLibrary
(
id
)
{
/** Function that add library jobs */
...
...
public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/MultisampleReportBuilderTest.scala
0 → 100644
View file @
658b77f5
package
nl.lumc.sasc.biopet.core.report
import
java.io.File
import
java.nio.file.Paths
import
com.google.common.io.Files
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
/**
* Created by pjvanthof on 24/02/16.
*/
class
MultisampleReportBuilderTest
extends
TestNGSuite
with
Matchers
{
private
def
resourcePath
(
p
:
String
)
:
String
=
{
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
}
@Test
def
testGeneratePages
()
:
Unit
=
{
val
builder
=
new
MultisampleReportBuilder
{
def
reportName
:
String
=
"test"
def
indexPage
:
ReportPage
=
ReportPage
(
"Samples"
->
generateSamplesPage
(
Map
())
::
Nil
,
Nil
,
Map
())
def
samplePage
(
sampleId
:
String
,
args
:
Map
[
String
,
Any
])
:
ReportPage
=
ReportPage
(
"Libraries"
->
generateLibraryPage
(
Map
(
"sampleId"
->
Some
(
sampleId
)))
::
Nil
,
Nil
,
Map
())
def
libraryPage
(
sampleId
:
String
,
libraryId
:
String
,
args
:
Map
[
String
,
Any
])
=
ReportPage
(
Nil
,
Nil
,
Map
())
}
val
tempDir
=
Files
.
createTempDir
()
tempDir
.
deleteOnExit
()
val
args
=
Array
(
"-s"
,
resourcePath
(
"/empty_summary.json"
),
"-o"
,
tempDir
.
getAbsolutePath
)
builder
.
main
(
args
)
builder
.
extFiles
.
foreach
(
x
=>
new
File
(
tempDir
,
"ext"
+
File
.
separator
+
x
.
targetPath
)
should
exist
)
def
createFile
(
path
:
String*
)
=
new
File
(
tempDir
,
path
.
mkString
(
File
.
separator
))
createFile
(
"index.html"
)
should
exist
createFile
(
"Samples"
,
"index.html"
)
should
exist
createFile
(
"Samples"
,
"sampleName"
,
"index.html"
)
should
exist
createFile
(
"Samples"
,
"sampleName"
,
"Libraries"
,
"index.html"
)
should
exist
createFile
(
"Samples"
,
"sampleName"
,
"Libraries"
,
"libName"
,
"index.html"
)
should
exist
}
}
public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/ReportBuilderTest.scala
0 → 100644
View file @
658b77f5
package
nl.lumc.sasc.biopet.core.report
import
java.io.File
import
java.nio.file.Paths
import
com.google.common.io.Files
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.
{
DataProvider
,
Test
}
/**
* Created by pjvanthof on 24/02/16.
*/
class
ReportBuilderTest
extends
TestNGSuite
with
Matchers
{
private
def
resourcePath
(
p
:
String
)
:
String
=
{
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
}
@DataProvider
(
name
=
"testGeneratePages"
)
def
generatePageProvider
=
{
val
sample
=
Array
(
Some
(
"sampleName"
),
None
)
val
lib
=
Array
(
Some
(
"libName"
),
None
)
val
nested
=
Array
(
false
,
true
)
for
(
s
<-
sample
;
l
<-
lib
;
n
<-
nested
)
yield
Array
(
s
,
l
,
n
)
}
@Test
(
dataProvider
=
"testGeneratePages"
)
def
testGeneratePages
(
sample
:
Option
[
String
],
lib
:
Option
[
String
],
nested
:
Boolean
)
:
Unit
=
{
val
builder
=
new
ReportBuilder
{
def
reportName
:
String
=
"test"
def
indexPage
:
ReportPage
=
ReportPage
(
(
if
(
nested
)
"p1"
->
ReportPage
(
Nil
,
Nil
,
Map
())
::
Nil
else
Nil
),
Nil
,
Map
())
}
val
tempDir
=
Files
.
createTempDir
()
tempDir
.
deleteOnExit
()
val
args
=
Array
(
"-s"
,
resourcePath
(
"/empty_summary.json"
),
"-o"
,
tempDir
.
getAbsolutePath
)
++
sample
.
map
(
x
=>
Array
(
"-a"
,
s
"sampleId=$x"
)).
getOrElse
(
Array
())
++
lib
.
map
(
x
=>
Array
(
"-a"
,
s
"libId=$x"
)).
getOrElse
(
Array
())
builder
.
main
(
args
)
builder
.
sampleId
shouldBe
sample
builder
.
libId
shouldBe
lib
builder
.
extFiles
.
foreach
(
x
=>
new
File
(
tempDir
,
"ext"
+
File
.
separator
+
x
.
targetPath
)
should
exist
)
new
File
(
tempDir
,
"index.html"
)
should
exist
new
File
(
tempDir
,
"p1"
+
File
.
separator
+
"index.html"
).
exists
()
shouldBe
nested
}
@Test
def
testCountPages
:
Unit
=
{
ReportBuilder
.
countPages
(
ReportPage
(
Nil
,
Nil
,
Map
()))
shouldBe
1
ReportBuilder
.
countPages
(
ReportPage
(
"p1"
->
ReportPage
(
Nil
,
Nil
,
Map
())
::
Nil
,
Nil
,
Map
()))
shouldBe
2
ReportBuilder
.
countPages
(
ReportPage
(
"p1"
->
ReportPage
(
Nil
,
Nil
,
Map
())
::
"p2"
->
ReportPage
(
Nil
,
Nil
,
Map
())
::
Nil
,
Nil
,
Map
()))
shouldBe
3
ReportBuilder
.
countPages
(
ReportPage
(
"p1"
->
ReportPage
(
"p1"
->
ReportPage
(
Nil
,
Nil
,
Map
())
::
Nil
,
Nil
,
Map
())
::
Nil
,
Nil
,
Map
()))
shouldBe
3
ReportBuilder
.
countPages
(
ReportPage
(
"p1"
->
ReportPage
(
Nil
,
Nil
,
Map
())
::
"p2"
->
ReportPage
(
"p1"
->
ReportPage
(
Nil
,
Nil
,
Map
())
::
Nil
,
Nil
,
Map
())
::
Nil
,
Nil
,
Map
()))
shouldBe
4
}
@Test
def
testRenderTemplate
:
Unit
=
{
ReportBuilder
.
templateCache
=
Map
()
ReportBuilder
.
templateCache
shouldBe
empty
ReportBuilder
.
renderTemplate
(
"/template.ssp"
,
Map
(
"arg"
->
"test"
))
shouldBe
"test"
ReportBuilder
.
templateCache
.
size
shouldBe
1
ReportBuilder
.
renderTemplate
(
"/template.ssp"
,
Map
(
"arg"
->
"bla"
))
shouldBe
"bla"
ReportBuilder
.
templateCache
.
size
shouldBe
1
}
}
public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/report/ReportSectionTest.scala
0 → 100644
View file @
658b77f5
package
nl.lumc.sasc.biopet.core.report
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
/**
* Created by pjvanthof on 24/02/16.
*/
class
ReportSectionTest
extends
TestNGSuite
with
Matchers
{
@Test
def
testSectionRender
:
Unit
=
{
ReportSection
(
"/template.ssp"
,
Map
(
"arg"
->
"test"
)).
render
()
shouldBe
"test"
ReportSection
(
"/template.ssp"
).
render
(
Map
(
"arg"
->
"test"
))
shouldBe
"test"
}
}
public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GvcfToBed.scala
View file @
658b77f5
...
...
@@ -25,7 +25,7 @@ class GvcfToBed(val root: Configurable) extends ToolCommandFunction {
var
minQuality
:
Int
=
0
@Argument
(
doc
=
"inverse"
,
required
=
false
)
var
inverse
:
Boolean
=
fals
e
var
inverse
:
Option
[
File
]
=
Non
e
override
def
defaultCoreMemory
=
4.0
...
...
@@ -35,7 +35,7 @@ class GvcfToBed(val root: Configurable) extends ToolCommandFunction {
required
(
"-O"
,
outputBed
)
+
optional
(
"-S"
,
sample
)
+
optional
(
"--minGenomeQuality"
,
minQuality
)
+
condi
tional
(
inver
se
,
"--
inver
ted"
)
op
tional
(
"--
inver
tedOutputBed"
,
inver
se
)
}
}
public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala
View file @
658b77f5
...
...
@@ -15,10 +15,10 @@
*/
package
nl.lumc.sasc.biopet.tools
import
java.io.
{
PrintWriter
,
File
}
import
java.io.
{
File
,
PrintWriter
}
import
htsjdk.samtools.fastq.
{
FastqReader
,
FastqRecord
}
import
nl.lumc.sasc.biopet.utils.
{
ToolCommand
,
ConfigUtils
}
import
nl.lumc.sasc.biopet.utils.
{
ConfigUtils
,
ToolCommand
}
import
scala.collection.JavaConverters._
import
scala.collection.immutable.Map
...
...
@@ -26,7 +26,8 @@ import scala.collection.mutable
import
scala.language.postfixOps
/**
* Created by pjvanthof on 11/09/15.
* Created by wyleung on 01/12/14.
* Modified by pjvanthof and warindrarto on 27/06/2015
*/
object
SeqStat
extends
ToolCommand
{
...
...
@@ -41,12 +42,12 @@ object SeqStat extends ToolCommand {
var
nucs
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
()
// generate the baseHistogram and readHistogram
var
baseHistogram
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
()
var
readHistogram
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
()
var
base
Qual
Histogram
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
()
var
read
Qual
Histogram
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
()
var
nucleotideHistoMap
:
mutable.Map
[
Char
,
Long
]
=
mutable
.
Map
()
private
var
baseQualHistoMap
:
mutable.Map
[
Int
,
Long
]
=
mutable
.
Map
(
0
->
0
)
private
var
readQualHistoMap
:
mutable.Map
[
Int
,
Long
]
=
mutable
.
Map
(
0
->
0
)
private
var
readQual
GTE
HistoMap
:
mutable.Map
[
Int
,
Long
]
=
mutable
.
Map
(
0
->
0
)
case
class
Args
(
fastq
:
File
=
null
,
outputJson
:
Option
[
File
]
=
None
)
extends
AbstractArgs
...
...
@@ -83,14 +84,15 @@ object SeqStat extends ToolCommand {
*/
def
detectPhredEncoding
(
quals
:
mutable.ArrayBuffer
[
Long
])
:
Unit
=
{
// substract 1 on high value, because we start from index 0
val
l_
qual
=
quals
.
takeWhile
(
_
==
0
).
length
val
h_
qual
=
quals
.
length
-
1
val
qual
_low_boundery
=
quals
.
takeWhile
(
_
==
0
).
length
val
qual
_high_boundery
=
quals
.
length
-
1
(
l_
qual
<
59
,
h_
qual
>
74
)
match
{
(
qual
_low_boundery
<
59
,
qual
_high_boundery
>
74
)
match
{
case
(
false
,
true
)
=>
phredEncoding
=
Solexa
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the h_qual exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
// New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K)
case
(
true
,
true
)
=>
phredEncoding
=
Solexa
// this is definite a sanger sequence, the lower end is sanger only
case
(
true
,
false
)
=>
phredEncoding
=
Sanger
...
...
@@ -102,16 +104,18 @@ object SeqStat extends ToolCommand {
// 'nuc' are the nucleotides 'ACTGN', the max ASCII value for this is T, pre-init the ArrayBuffer to this value
// as we don't expect the have other 'higher' numbered Nucleotides for now.
case
class
BaseStat
(
qual
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
(),
nuc
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
.
fill
(
'T'
.
toInt
+
1
)(
0
))
nuc
s
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
.
fill
(
'T'
.
toInt
+
1
)(
0
))
case
class
ReadStat
(
qual
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
(),
nuc
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
.
fill
(
'T'
.
toInt
+
1
)(
0
),
nuc
s
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
.
fill
(
'T'
.
toInt
+
1
)(
0
),
var
withN
:
Long
=
0L
,
lengths
:
mutable.ArrayBuffer
[
Int
]
=
mutable
.
ArrayBuffer
())
val
baseStats
:
mutable.ArrayBuffer
[
BaseStat
]
=
mutable
.
ArrayBuffer
()
val
readStats
:
ReadStat
=
new
ReadStat
()
var
readLengthHistogram
:
mutable.Map
[
String
,
Long
]
=
mutable
.
Map
.
empty
/**
* Compute the quality metric per read
* Results are stored in baseStats and readStats
...
...
@@ -130,25 +134,22 @@ object SeqStat extends ToolCommand {
readStats
.
lengths
++=
mutable
.
ArrayBuffer
.
fill
(
record
.
length
-
readStats
.
lengths
.
length
+
1
)(
0
)
}
val
readQual
=
record
.
getBaseQualityString
val
readQual
ity
=
record
.
getBaseQualityString
val
readNucleotides
=
record
.
getReadString
if
(
record
.
length
>=
readStats
.
lengths
.
size
)
// Extends array when length not yet possible
(
0
to
(
record
.
length
-
readStats
.
lengths
.
size
)).
foreach
(
_
=>
readStats
.
lengths
.
append
(
0
))
readStats
.
lengths
(
record
.
length
)
+=
1
for
(
t
<-
0
until
record
.
length
())
{
if
(
baseStats
(
t
).
qual
.
length
<=
readQual
(
t
))
{
baseStats
(
t
).
qual
++=
mutable
.
ArrayBuffer
.
fill
(
readQual
(
t
).
toInt
-
baseStats
(
t
).
qual
.
length
+
1
)(
0
)
if
(
baseStats
(
t
).
qual
.
length
<=
readQual
ity
(
t
))
{
baseStats
(
t
).
qual
++=
mutable
.
ArrayBuffer
.
fill
(
readQual
ity
(
t
).
toInt
-
baseStats
(
t
).
qual
.
length
+
1
)(
0
)
}
baseStats
(
t
).
qual
(
readQual
(
t
))
+=
1
baseStats
(
t
).
nuc
(
readNucleotides
(
t
))
+=
1
readStats
.
nuc
(
readNucleotides
(
t
))
+=
1
baseStats
(
t
).
qual
(
readQual
ity
(
t
))
+=
1
baseStats
(
t
).
nuc
s
(
readNucleotides
(
t
))
+=
1
readStats
.
nuc
s
(
readNucleotides
(
t
))
+=
1
}
// implicit conversion to Int using foldLeft(0)
val
avgQual
:
Int
=
readQual
.
sum
/
readQual
.
length
val
avgQual
:
Int
=
readQual
ity
.
sum
/
readQual
ity
.
length
if
(
readStats
.
qual
.
length
<=
avgQual
)
{
readStats
.
qual
++=
mutable
.
ArrayBuffer
.
fill
(
avgQual
-
readStats
.
qual
.
length
+
1
)(
0
)
}
...
...
@@ -179,74 +180,53 @@ object SeqStat extends ToolCommand {
if
(
quals
.
length
<=
baseStats
(
pos
).
qual
.
length
)
{
quals
++=
mutable
.
ArrayBuffer
.
fill
(
baseStats
(
pos
).
qual
.
length
-
quals
.
length
)(
0
)
}
if
(
nucs
.
length
<=
baseStats
(
pos
).
nuc
.
length
)
{
for
(
_
<-
nucs
.
length
until
baseStats
(
pos
).
nuc
.
length
)
nucs
.
append
(
0
)
if
(
nucs
.
length
<=
baseStats
(
pos
).
nuc
s
.
length
)
{
nucs
++=
mutable
.
ArrayBuffer
.
fill
(
baseStats
(
pos
).
nuc
s
.
length
-
nucs
.
length
)
(
0
)
}
// count into the quals
baseStats
(
pos
).
qual
.
zipWithIndex
foreach
{
case
(
value
,
index
)
=>
quals
(
index
)
+=
value
}
// count N into nucs
baseStats
(
pos
).
nuc
.
zipWithIndex
foreach
{
case
(
value
,
index
)
=>
nucs
(
index
)
+=
value
}
baseStats
(
pos
).
nuc
s
.
zipWithIndex
foreach
{
case
(
value
,
index
)
=>
nucs
(
index
)
+=
value
}
}
detectPhredEncoding
(
quals
)
logger
.
debug
(
"Detected '"
+
phredEncoding
.
toString
.
toLowerCase
+
"' encoding in fastq file ..."
)
for
(
pos
<-
nucs
.
indices
)
{
// always export the N-nucleotide
if
(
nucs
(
pos
)
>
0
||
pos
.
toChar
==
'N'
)
{
nucleotideHistoMap
+=
(
pos
.
toChar
->
nucs
(
pos
))
}
}
nucleotideHistoMap
=
nucs
.
toList
.
foldLeft
(
mutable
.
Map
[
Char
,
Long
]())(
(
output
,
nucleotideCount
)
=>
output
+
(
output
.
size
.
toChar
->
nucleotideCount
)
)
// ensure bases: `ACTGN` is always reported even having a zero count.
// Other chars might be counted also, these are also reported
.
retain
((
nucleotide
,
count
)
=>
(
count
>
0
||
"ACTGN"
.
contains
(
nucleotide
.
toString
)))
// init baseHistogram with the bounderies of the report values
for
(
pos
<-
0
until
reportValues
.
max
+
1
)
{
baseHistogram
.
append
(
0
)
readHistogram
.
append
(
0
)
}
baseQualHistogram
=
quals
.
slice
(
phredEncoding
.
id
,
quals
.
size
)
baseQualHistogram
++=
mutable
.
ArrayBuffer
.
fill
(
reportValues
.
max
+
1
-
baseQualHistogram
.
size
)(
0L
)
for
(
pos
<-
quals
.
indices
)
{
val
key
:
Int
=
pos
-
phredEncoding
.
id
if
(
key
>=
0
)
{
baseHistogram
(
key
)
+=
quals
(
pos
)
}
}
readQualHistogram
=
readStats
.
qual
.
slice
(
phredEncoding
.
id
,
readStats
.
qual
.
size
)
readQualHistogram
++=
mutable
.
ArrayBuffer
.
fill
(
reportValues
.
max
+
1
-
readQualHistogram
.
size
)(
0L
)
for
(
pos
<-
readStats
.
qual
.
indices
)
{
val
key
:
Int
=
pos
-
phredEncoding
.
id
if
(
key
>
0
)
{
// count till the max of baseHistogram.length
for
(
histokey
<-
0
until
key
+
1
)
{
readHistogram
(
histokey
)
+=
readStats
.
qual
(
pos
)
readQualGTEHistoMap
=
readQualHistogram
.
indices
.
foldLeft
(
mutable
.
Map
[
Int
,
Long
]())(
(
output
,
index
)
=>
{
output
+
(
output
.
keys
.
size
->
readQualHistogram
.
slice
(
index
,
readQualHistogram
.
size
).
sum
)
}
}
}
for
(
pos
<-
readHistogram
.
indices
)
{
readQualHistoMap
+=
(
pos
->
readHistogram
(
pos
))
}
)
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
val
commandArgs
:
Args
=
parseArgs
(
args
)
logger
.
info
(
"Start seqstat"
)
seqStat
(
new
FastqReader
(
commandArgs
.
fastq
))
summarize
()
logger
.
info
(
"Seqstat done"
)
val
report
:
Map
[
String
,
Any
]
=
Map
(
def
reportMap
(
fastqPath
:
File
)
:
Map
[
String
,
Any
]
=
{
Map
(
(
"files"
,
Map
(
(
"fastq"
,
Map
(
(
"path"
,
commandArgs
.
fastq
))
(
"path"
,
fastqPath
.
getAbsolutePath
))
)
)
),
(
"stats"
,
Map
(
(
"bases"
,
Map
(
(
"num_total"
,
nucleotideHistoMap
.
values
.
sum
),
(
"num_qual"
,
baseHistogram
.
toList
),
(
"num_qual"
,
base
Qual
Histogram
.
toList
),
(
"nucleotides"
,
nucleotideHistoMap
.
toMap
)
)),
(
"reads"
,
Map
(
...
...
@@ -254,11 +234,24 @@ object SeqStat extends ToolCommand {
(
"num_total"
,
readStats
.
qual
.
sum
),
(
"len_min"
,
readStats
.
lengths
.
takeWhile
(
_
==
0
).
length
),
(
"len_max"
,
readStats
.
lengths
.
length
-
1
),
(
"num_avg_qual_gte"
,
readQualHistoMap
.
toMap
),
(
"qual_encoding"
,
phredEncoding
.
toString
.
toLowerCase
)
(
"num_avg_qual_gte"
,
readQualGTEHistoMap
.
toMap
),
(
"qual_encoding"
,
phredEncoding
.
toString
.
toLowerCase
),
(
"len_histogram"
,
readStats
.
lengths
.
toList
)
))
))
)
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
val
commandArgs
:
Args
=
parseArgs
(
args
)
logger
.
info
(
"Start seqstat"
)
seqStat
(
new
FastqReader
(
commandArgs
.
fastq
))
summarize
()
logger
.
info
(
"Seqstat done"
)
val
report
=
reportMap
(
commandArgs
.
fastq
)
commandArgs
.
outputJson
match
{
case
Some
(
file
)
=>
{
...
...
public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GvcfToBedTest.scala
View file @
658b77f5
...
...
@@ -12,6 +12,8 @@ import org.scalatest.testng.TestNGSuite
import
GvcfToBed._
import
org.testng.annotations.Test
import
scala.io.Source
/**
* Created by ahbbollen on 13-10-15.
*/
...
...
@@ -39,4 +41,46 @@ class GvcfToBedTest extends TestNGSuite with Matchers with MockitoSugar {
VcfUtils
.
hasMinGenomeQuality
(
record2
,
"Sample_102"
,
3
)
shouldBe
true
VcfUtils
.
hasMinGenomeQuality
(
record2
,
"Sample_102"
,
99
)
shouldBe
false
}
@Test
def
testGvcfToBedOutput
=
{
val
tmp
=
File
.
createTempFile
(
"gvcf2bedtest"
,
".bed"
)
tmp
.
deleteOnExit
()
val
args
:
Array
[
String
]
=
Array
(
"-I"
,
unvepped
.
getAbsolutePath
,
"-O"
,
tmp
.
getAbsolutePath
,
"-S"
,
"Sample_101"
,
"--minGenomeQuality"
,
"99"
)
main
(
args
)
Source
.
fromFile
(
tmp
).
getLines
().
size
shouldBe
0
val
tmp2
=
File
.
createTempFile
(
"gvcf2bedtest"
,
".bed"
)
tmp2
.
deleteOnExit
()
val
args2
:
Array
[
String
]
=
Array
(
"-I"
,
unvepped
.
getAbsolutePath
,
"-O"
,
tmp2
.
getAbsolutePath
,
"-S"
,
"Sample_102"
,
"--minGenomeQuality"
,
"2"
)
main
(
args2
)
Source
.
fromFile
(
tmp2
).
getLines
().
size
shouldBe
1
}
@Test
def
testGvcfToBedInvertedOutput
=
{
val
tmp
=
File
.
createTempFile
(
"gvcf2bedtest"
,
".bed"
)
val
tmp_inv
=
File
.
createTempFile
(
"gvcf2bedtest"
,
".bed"
)
tmp
.
deleteOnExit
()
tmp_inv
.
deleteOnExit
()
val
args
:
Array
[
String
]
=
Array
(
"-I"
,
unvepped
.
getAbsolutePath
,
"-O"
,
tmp
.
getAbsolutePath
,
"-S"
,
"Sample_101"
,