Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
3dc92c86
Commit
3dc92c86
authored
Feb 23, 2015
by
bow
Browse files
Merge branch 'feature-seqstat-scala' into 'develop'
Feature seqstat scala #34 Ready for testing See merge request !90
parents
190b961c
163386a3
Changes
9
Hide whitespace changes
Inline
Side-by-side
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala
View file @
3dc92c86
...
...
@@ -99,8 +99,7 @@ object Seqstat {
(
"40"
:=
basesTotal
(
"40"
))
->:
(
"50"
:=
basesTotal
(
"50"
))
->:
(
"60"
:=
basesTotal
(
"60"
))
->:
jEmptyObject
))
->:
jEmptyObject
))
->:
jEmptyObject
))
->:
jEmptyObject
))
->:
(
"reads"
:=
(
(
"num_with_n"
:=
readsTotal
(
"num_with_n"
))
->:
(
"num_total"
:=
readsTotal
(
"num_total"
))
->:
...
...
@@ -114,8 +113,7 @@ object Seqstat {
(
"40"
:=
readsTotal
(
"40"
))
->:
(
"50"
:=
readsTotal
(
"50"
))
->:
(
"60"
:=
readsTotal
(
"60"
))
->:
jEmptyObject
))
->:
jEmptyObject
))
->:
jEmptyObject
))
->:
jEmptyObject
))
->:
(
"qual_encoding"
:=
encoding
.
head
)
->:
jEmptyObject
}
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/Seqstat.scala
0 → 100644
View file @
3dc92c86
/**
* Copyright (c) 2014 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl>
* @author Wai Yi Leung <w.y.leung@lumc.nl>
*
*/
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
org.broadinstitute.gatk.utils.commandline.
{
Output
,
Input
}
import
scala.collection.JavaConverters._
import
scala.collection.mutable
import
scala.collection.immutable.Map
import
scala.io.Source
import
scala.language.postfixOps
import
htsjdk.samtools.fastq.
{
FastqReader
,
FastqRecord
}
import
scalaz._
,
Scalaz
.
_
import
argonaut._
,
Argonaut
.
_
import
nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import
nl.lumc.sasc.biopet.core.ToolCommand
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.utils.ConfigUtils
/**
* Seqstat function class for usage in Biopet pipelines
*
* @param root Configuration object for the pipeline
*/
class
Seqstat
(
val
root
:
Configurable
)
extends
BiopetJavaCommandLineFunction
{
javaMainClass
=
getClass
.
getName
@Input
(
doc
=
"Input FASTQ"
,
shortName
=
"input"
,
required
=
true
)
var
input
:
File
=
null
@Output
(
doc
=
"Output JSON"
,
shortName
=
"output"
,
required
=
true
)
var
output
:
File
=
null
override
val
defaultVmem
=
"4G"
memoryLimit
=
Option
(
3.0
)
override
def
commandLine
=
super
.
commandLine
+
required
(
"-i"
,
input
)
+
" > "
+
required
(
output
)
def
summary
:
Json
=
{
val
json
=
Parse
.
parseOption
(
Source
.
fromFile
(
output
).
mkString
)
if
(
json
.
isEmpty
)
jNull
else
json
.
get
.
fieldOrEmptyObject
(
"stats"
)
}
}
object
FqEncoding
extends
Enumeration
{
type
FqEncoding
=
Value
val
Sanger
=
Value
(
33
,
"Sanger"
)
val
Solexa
=
Value
(
64
,
"Solexa"
)
val
Unknown
=
Value
(
0
,
"Unknown"
)
}
object
Seqstat
extends
ToolCommand
{
def
apply
(
root
:
Configurable
,
input
:
File
,
output
:
File
)
:
Seqstat
=
{
val
seqstat
=
new
Seqstat
(
root
)
seqstat
.
input
=
input
seqstat
.
output
=
output
seqstat
}
def
apply
(
root
:
Configurable
,
fastqfile
:
File
,
outDir
:
String
)
:
Seqstat
=
{
val
seqstat
=
new
Seqstat
(
root
)
val
ext
=
fastqfile
.
getName
.
substring
(
fastqfile
.
getName
.
lastIndexOf
(
"."
))
seqstat
.
input
=
fastqfile
seqstat
.
output
=
new
File
(
outDir
+
fastqfile
.
getName
.
substring
(
0
,
fastqfile
.
getName
.
lastIndexOf
(
"."
))
+
".seqstats.json"
)
seqstat
}
def
mergeSummaries
(
jsons
:
List
[
Json
])
:
Json
=
{
def
addJson
(
json
:
Json
,
total
:
mutable.Map
[
String
,
Long
])
{
for
(
key
<-
json
.
objectFieldsOrEmpty
)
{
if
(
json
.
field
(
key
).
get
.
isObject
)
addJson
(
json
.
field
(
key
).
get
,
total
)
else
if
(
json
.
field
(
key
).
get
.
isNumber
)
{
val
number
=
json
.
field
(
key
).
get
.
numberOrZero
.
toLong
if
(
total
.
contains
(
key
))
{
if
(
key
==
"len_min"
)
{
if
(
total
(
key
)
>
number
)
total
(
key
)
=
number
}
else
if
(
key
==
"len_max"
)
{
if
(
total
(
key
)
<
number
)
total
(
key
)
=
number
}
else
total
(
key
)
+=
number
}
else
total
+=
(
key
->
number
)
}
}
}
var
basesTotal
:
mutable.Map
[
String
,
Long
]
=
mutable
.
Map
()
var
readsTotal
:
mutable.Map
[
String
,
Long
]
=
mutable
.
Map
()
var
encoding
:
Set
[
Json
]
=
Set
()
for
(
json
<-
jsons
)
{
encoding
+=
json
.
fieldOrEmptyString
(
"qual_encoding"
)
val
bases
=
json
.
fieldOrEmptyObject
(
"bases"
)
addJson
(
bases
,
basesTotal
)
val
reads
=
json
.
fieldOrEmptyObject
(
"reads"
)
addJson
(
reads
,
readsTotal
)
}
(
"bases"
:=
(
(
"num_n"
:=
basesTotal
(
"num_n"
))
->:
(
"num_total"
:=
basesTotal
(
"num_total"
))
->:
(
"num_qual_gte"
:=
(
(
"1"
:=
basesTotal
(
"1"
))
->:
(
"10"
:=
basesTotal
(
"10"
))
->:
(
"20"
:=
basesTotal
(
"20"
))
->:
(
"30"
:=
basesTotal
(
"30"
))
->:
(
"40"
:=
basesTotal
(
"40"
))
->:
(
"50"
:=
basesTotal
(
"50"
))
->:
(
"60"
:=
basesTotal
(
"60"
))
->:
jEmptyObject
))
->:
jEmptyObject
))
->:
(
"reads"
:=
(
(
"num_with_n"
:=
readsTotal
(
"num_with_n"
))
->:
(
"num_total"
:=
readsTotal
(
"num_total"
))
->:
(
"len_min"
:=
readsTotal
(
"len_min"
))
->:
(
"len_max"
:=
readsTotal
(
"len_max"
))
->:
(
"num_mean_qual_gte"
:=
(
(
"1"
:=
readsTotal
(
"1"
))
->:
(
"10"
:=
readsTotal
(
"10"
))
->:
(
"20"
:=
readsTotal
(
"20"
))
->:
(
"30"
:=
readsTotal
(
"30"
))
->:
(
"40"
:=
readsTotal
(
"40"
))
->:
(
"50"
:=
readsTotal
(
"50"
))
->:
(
"60"
:=
readsTotal
(
"60"
))
->:
jEmptyObject
))
->:
jEmptyObject
))
->:
(
"qual_encoding"
:=
encoding
.
head
)
->:
jEmptyObject
}
import
FqEncoding._
var
phredEncoding
:
FqEncoding.Value
=
Sanger
val
reportValues
=
List
(
1
,
10
,
20
,
30
,
40
,
50
,
60
)
// the base quality for each position on the reads
var
quals
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
()
var
nucs
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
()
// generate the baseHistogram and readHistogram
var
baseHistogram
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
()
var
readHistogram
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
()
var
nucleotideHistoMap
:
mutable.Map
[
Char
,
Long
]
=
mutable
.
Map
()
private
var
baseQualHistoMap
:
mutable.Map
[
Int
,
Long
]
=
mutable
.
Map
(
0
->
0
)
private
var
readQualHistoMap
:
mutable.Map
[
Int
,
Long
]
=
mutable
.
Map
(
0
->
0
)
case
class
Args
(
fastq
:
File
=
new
File
(
""
))
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
head
(
s
"""
|$commandName - Summarize FastQ
"""
.
stripMargin
)
opt
[
File
](
'i'
,
"fastq"
)
required
()
valueName
"<fastq>"
action
{
(
x
,
c
)
=>
c
.
copy
(
fastq
=
x
)
}
validate
{
x
=>
if
(
x
.
exists
)
success
else
failure
(
"FASTQ file not found"
)
}
text
"FastQ file to generate stats from"
}
/**
* Parses the command line argument
*
* @param args Array of arguments
* @return
*/
def
parseArgs
(
args
:
Array
[
String
])
:
Args
=
new
OptParser
()
.
parse
(
args
,
Args
())
.
getOrElse
(
sys
.
exit
(
1
))
/**
*
* @param quals Computed quality histogram [flat]
*/
def
detectPhredEncoding
(
quals
:
mutable.ArrayBuffer
[
Long
])
:
Unit
=
{
// substract 1 on high value, because we start from index 0
val
l_qual
=
quals
.
takeWhile
(
_
==
0
).
length
val
h_qual
=
quals
.
length
-
1
(
l_qual
<
59
,
h_qual
>
74
)
match
{
case
(
false
,
true
)
=>
{
phredEncoding
=
Solexa
}
case
(
true
,
true
)
=>
{
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the h_qual exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
phredEncoding
=
Solexa
}
case
(
true
,
false
)
=>
{
// this is definite a sanger sequence, the lower end is sanger only
phredEncoding
=
Sanger
}
case
(
_
,
_
)
=>
{
phredEncoding
=
Unknown
}
}
}
// Setting up the internal storage for the statistics gathered for each read
// 'nuc' are the nucleotides 'ACTGN', the max ASCII value for this is T, pre-init the ArrayBuffer to this value
// as we don't expect the have other 'higher' numbered Nucleotides for now.
case
class
BaseStat
(
qual
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
(),
nuc
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
.
fill
(
'T'
.
toInt
+
1
)(
0
))
case
class
ReadStat
(
qual
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
(),
nuc
:
mutable.ArrayBuffer
[
Long
]
=
mutable
.
ArrayBuffer
.
fill
(
'T'
.
toInt
+
1
)(
0
),
var
withN
:
Long
,
lengths
:
mutable.ArrayBuffer
[
Int
]
=
mutable
.
ArrayBuffer
())
val
baseStats
:
mutable.ArrayBuffer
[
BaseStat
]
=
mutable
.
ArrayBuffer
()
val
readStats
:
ReadStat
=
new
ReadStat
(
mutable
.
ArrayBuffer
(),
mutable
.
ArrayBuffer
.
fill
(
'T'
.
toInt
+
1
)(
0
),
0L
,
mutable
.
ArrayBuffer
())
/**
* Compute the quality metric per read
* Results are stored in baseStats and readStats
*
* @param record FastqRecord
*/
def
procesRead
(
record
:
FastqRecord
)
:
Unit
=
{
// Adjust/expand the length of baseStat case classes to the size of current
// read if the current list is not long enough to store the data
if
(
baseStats
.
length
<
record
.
length
)
{
baseStats
++=
mutable
.
ArrayBuffer
.
fill
(
record
.
length
-
baseStats
.
length
)(
BaseStat
())
}
if
(
readStats
.
lengths
.
length
<
record
.
length
)
{
readStats
.
lengths
++=
mutable
.
ArrayBuffer
.
fill
(
record
.
length
-
readStats
.
lengths
.
length
+
1
)(
0
)
}
val
readQual
=
record
.
getBaseQualityString
val
readNucleotides
=
record
.
getReadString
readStats
.
lengths
(
record
.
length
)
+=
1
for
(
t
<-
0
until
record
.
length
())
{
if
(
baseStats
(
t
).
qual
.
length
<=
readQual
(
t
))
{
baseStats
(
t
).
qual
++=
mutable
.
ArrayBuffer
.
fill
(
readQual
(
t
).
toInt
-
baseStats
(
t
).
qual
.
length
+
1
)(
0
)
}
baseStats
(
t
).
qual
(
readQual
(
t
))
+=
1
baseStats
(
t
).
nuc
(
readNucleotides
(
t
))
+=
1
readStats
.
nuc
(
readNucleotides
(
t
))
+=
1
}
// implicit conversion to Int using foldLeft(0)
val
avgQual
:
Int
=
(
readQual
.
sum
/
readQual
.
length
)
if
(
readStats
.
qual
.
length
<=
avgQual
)
{
readStats
.
qual
++=
mutable
.
ArrayBuffer
.
fill
(
avgQual
-
readStats
.
qual
.
length
+
1
)(
0
)
}
readStats
.
qual
(
avgQual
)
+=
1
readStats
.
withN
+=
{
if
(
readNucleotides
.
contains
(
"N"
))
1L
else
0L
}
}
/**
* seqStat, the compute entrypoint where all statistics collection starts
*
* @param fqreader FastqReader
* @return numReads - number of reads counted
*/
def
seqStat
(
fqreader
:
FastqReader
)
:
Long
=
{
var
numReads
:
Long
=
0
for
(
read
<-
fqreader
.
iterator
.
asScala
)
{
procesRead
(
read
)
numReads
+=
1
}
numReads
}
def
summarize
()
:
Unit
=
{
// for every position to the max length of any read
for
(
pos
<-
0
until
baseStats
.
length
)
{
// list all qualities at this particular position `pos`
// fix the length of `quals`
if
(
quals
.
length
<=
baseStats
(
pos
).
qual
.
length
)
{
quals
++=
mutable
.
ArrayBuffer
.
fill
(
baseStats
(
pos
).
qual
.
length
-
quals
.
length
)(
0
)
}
if
(
nucs
.
length
<=
baseStats
(
pos
).
nuc
.
length
)
{
for
(
_
<-
nucs
.
length
until
baseStats
(
pos
).
nuc
.
length
)
nucs
.
append
(
0
)
}
// count into the quals
baseStats
(
pos
).
qual
.
zipWithIndex
foreach
{
case
(
value
,
index
)
=>
quals
(
index
)
+=
value
}
// count N into nucs
baseStats
(
pos
).
nuc
.
zipWithIndex
foreach
{
case
(
value
,
index
)
=>
nucs
(
index
)
+=
value
}
}
detectPhredEncoding
(
quals
)
for
(
pos
<-
0
until
nucs
.
length
)
{
// always export the N-nucleotide
if
(
nucs
(
pos
)
>
0
||
pos
.
toChar
==
'N'
)
{
nucleotideHistoMap
+=
(
pos
.
toChar
->
nucs
(
pos
))
}
}
// init baseHistogram with the bounderies of the report values
for
(
pos
<-
0
until
reportValues
.
max
+
1
)
{
baseHistogram
.
append
(
0
)
readHistogram
.
append
(
0
)
}
for
(
pos
<-
0
until
quals
.
length
)
{
var
key
:
Int
=
pos
-
phredEncoding
.
id
if
(
key
>
0
)
{
// count till the max of baseHistogram.length
for
(
histokey
<-
0
until
key
+
1
)
{
baseHistogram
(
histokey
)
+=
quals
(
pos
)
}
}
}
for
(
pos
<-
0
until
baseHistogram
.
length
)
{
baseQualHistoMap
+=
(
pos
->
baseHistogram
(
pos
))
}
for
(
pos
<-
0
until
readStats
.
qual
.
length
)
{
var
key
:
Int
=
pos
-
phredEncoding
.
id
if
(
key
>
0
)
{
// count till the max of baseHistogram.length
for
(
histokey
<-
0
until
key
+
1
)
{
readHistogram
(
histokey
)
+=
readStats
.
qual
(
pos
)
}
}
}
for
(
pos
<-
0
until
readHistogram
.
length
)
{
readQualHistoMap
+=
(
pos
->
readHistogram
(
pos
))
}
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
val
commandArgs
:
Args
=
parseArgs
(
args
)
logger
.
info
(
"Start seqstat"
)
val
reader
=
new
FastqReader
(
commandArgs
.
fastq
)
val
numReads
=
seqStat
(
reader
)
summarize
()
logger
.
debug
(
nucs
)
// logger.debug(baseStats)
logger
.
info
(
"Seqstat done"
)
val
report
:
Map
[
String
,
Any
]
=
Map
(
(
"files"
,
Map
(
(
"fastq"
,
Map
(
(
"path"
,
commandArgs
.
fastq
),
(
"checksum_sha1"
,
""
)
)
)
)
),
(
"stats"
,
Map
(
(
"bases"
,
Map
(
(
"num_n"
,
nucleotideHistoMap
(
'N'
)),
(
"num_total"
,
nucleotideHistoMap
.
values
.
sum
),
(
"num_qual_gte"
,
baseQualHistoMap
.
toMap
),
(
"nucleotides"
,
nucleotideHistoMap
.
toMap
)
)),
(
"reads"
,
Map
(
(
"num_with_n"
,
readStats
.
withN
),
(
"num_total"
,
readStats
.
qual
.
sum
),
(
"len_min"
,
readStats
.
lengths
.
takeWhile
(
_
==
0
).
length
),
(
"len_max"
,
readStats
.
lengths
.
length
-
1
),
(
"num_qual_gte"
,
readQualHistoMap
.
toMap
),
(
"qual_encoding"
,
phredEncoding
.
toString
.
toLowerCase
)
))
))
)
val
jsonReport
:
Json
=
{
ConfigUtils
.
mapToJson
(
report
)
}
println
(
jsonReport
.
spaces2
)
}
}
public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/SeqstatTest.scala
0 → 100644
View file @
3dc92c86
/**
* Copyright (c) 2015 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl>
* @author Wai Yi Leung <w.y.leung@lumc.nl>
*/
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
java.nio.file.Paths
import
htsjdk.samtools.fastq.
{
FastqReader
,
FastqRecord
}
import
org.mockito.Mockito.
{
inOrder
=>
inOrd
,
when
}
import
org.scalatest.Matchers
import
org.scalatest.mock.MockitoSugar
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.
{
DataProvider
,
Test
}
import
scala.collection.JavaConverters._
class
SeqstatTest
extends
TestNGSuite
with
MockitoSugar
with
Matchers
{
import
nl.lumc.sasc.biopet.tools.Seqstat._
import
nl.lumc.sasc.biopet.tools.FqEncoding._
private
def
resourceFile
(
p
:
String
)
:
File
=
new
File
(
resourcePath
(
p
))
private
def
resourcePath
(
p
:
String
)
:
String
=
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
// Helper functions to create iterator over FastqRecords given its IDs as Ints
// Record with 'A' and Qual==39 (SangerEncoding)
private
def
recordsOver
(
ids
:
String*
)
:
java.util.Iterator
[
FastqRecord
]
=
ids
.
map
(
x
=>
new
FastqRecord
(
x
,
"ACGTN"
,
""
,
"HIBC!"
))
.
toIterator
.
asJava
@DataProvider
(
name
=
"mockReaderProvider"
)
def
mockReaderProvider
()
=
Array
(
Array
(
mock
[
FastqReader
])
)
@Test
(
dataProvider
=
"mockReaderProvider"
,
groups
=
Array
(
"sanger"
),
singleThreaded
=
true
)
def
testDefault
(
fqMock
:
FastqReader
)
=
{
when
(
fqMock
.
iterator
)
thenReturn
recordsOver
(
"1"
,
"2"
,
"3"
)
}
@Test
(
dataProvider
=
"mockReaderProvider"
,
groups
=
Array
(
"read"
),
singleThreaded
=
true
)
def
testSeqCountReads
(
fqMock
:
FastqReader
)
=
{
when
(
fqMock
.
iterator
)
thenReturn
recordsOver
(
"1"
,
"2"
,
"3"
,
"4"
,
"5"
)
val
seqstat
=
Seqstat
val
numReads
=
seqstat
.
seqStat
(
fqMock
)
numReads
shouldBe
5
}
@Test
(
dataProvider
=
"mockReaderProvider"
,
groups
=
Array
(
"phredscore"
),
singleThreaded
=
true
,
dependsOnGroups
=
Array
(
"read"
))
def
testEncodingDetectionSanger
(
fqMock
:
FastqReader
)
=
{
val
seqstat
=
Seqstat
seqstat
.
summarize
()
seqstat
.
phredEncoding
shouldBe
Sanger
}
@Test
(
dataProvider
=
"mockReaderProvider"
,
groups
=
Array
(
"nucleocount"
),
singleThreaded
=
true
,
dependsOnGroups
=
Array
(
"phredscore"
))
def
testEncodingNucleotideCount
(
fqMock
:
FastqReader
)
=
{
val
seqstat
=
Seqstat
nucleotideHistoMap
(
'N'
)
shouldEqual
5
nucleotideHistoMap
(
'A'
)
shouldEqual
5
nucleotideHistoMap
(
'C'
)
shouldEqual
5
nucleotideHistoMap
(
'T'
)
shouldEqual
5
nucleotideHistoMap
(
'G'
)
shouldEqual
5
}
@Test
(
dataProvider
=
"mockReaderProvider"
,
groups
=
Array
(
"basehistogram"
),
singleThreaded
=
true
,
dependsOnGroups
=
Array
(
"nucleocount"
))
def
testEncodingBaseHistogram
(
fqMock
:
FastqReader
)
=
{
val
seqstat
=
Seqstat
baseHistogram
(
40
)
shouldEqual
5
baseHistogram
(
39
)
shouldEqual
10
baseHistogram
(
34
)
shouldEqual
15
baseHistogram
(
33
)
shouldEqual
20
baseHistogram
(
0
)
shouldEqual
20
}
@Test
def
testArgsMinimum
()
=
{
val
args
=
Array
(
"-i"
,
resourcePath
(
"/paired01a.fq"
))
val
parsed
=
parseArgs
(
args
)
parsed
.
fastq
shouldBe
resourceFile
(
"/paired01a.fq"
)
}
}
\ No newline at end of file
public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala
View file @
3dc92c86
...
...
@@ -49,6 +49,7 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl
.
lumc
.
sasc
.
biopet
.
tools
.
BastyGenerateFasta
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
MergeAlleles
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
SamplesTsvToJson
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
Seqstat
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
VEPNormalizer
,
nl
.
lumc
.
sasc
.
biopet
.
tools
.
AnnotateVcfWithBed
)
}
public/carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala
View file @
3dc92c86
...
...
@@ -84,7 +84,6 @@ object CarpTest {
val
executables
=
Map
(
"reference"
->
"test"
,
"seqstat"
->
Map
(
"exe"
->
"test"
),
"fastqc"
->
Map
(
"exe"
->
"test"
),
"seqtk"
->
Map
(
"exe"
->
"test"
),
"sickle"
->
Map
(
"exe"
->
"test"
),
...
...
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala
View file @
3dc92c86
...
...
@@ -20,7 +20,8 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Argument }
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.
{
Gzip
,
Pbzip2
,
Md5sum
,
Zcat
,
Seqstat
}
import
nl.lumc.sasc.biopet.extensions.
{
Gzip
,
Pbzip2
,
Md5sum
,
Zcat
}
import
nl.lumc.sasc.biopet.tools.Seqstat
import
nl.lumc.sasc.biopet.tools.FastqSync
class
Flexiprep
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
...
...
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala
View file @
3dc92c86
...
...
@@ -17,7 +17,8 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep
import
java.io.PrintWriter
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.
{
Md5sum
,
Seqstat
}
import
nl.lumc.sasc.biopet.extensions.
{
Md5sum
}
import
nl.lumc.sasc.biopet.tools.Seqstat
import
nl.lumc.sasc.biopet.tools.FastqSync
import
org.broadinstitute.gatk.queue.function.InProcessFunction
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
...
...
@@ -148,25 +149,25 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co
}
def
seqstatSummary
()
:
Option
[
Json
]
=
{
val
R1_chunks
=
for
((
key
,
value
)
<-
chunks
)
yield
value
.
seqstatR1
.
getS
ummary
val
R1_chunks
=
for
((
key
,
value
)
<-
chunks
)
yield
value
.
seqstatR1
.
s
ummary
val
R1
:
Json
=
Seqstat
.
mergeSummaries
(
R1_chunks
.
toList
)
val
R2
:
Option
[
Json
]
=
if
(!
flexiprep
.
paired
)
None
else
if
(
chunks
.
size
==
1
)
Option
(
chunks
.
head
.
_2
.
seqstatR2
.
getS
ummary
)
else
if
(
chunks
.
size
==
1
)
Option
(
chunks
.
head
.
_2
.
seqstatR2
.
s
ummary
)
else
{
val
s
=
for
((
key
,
value
)
<-
chunks
)
yield
value
.
seqstatR2
.
getS
ummary
val
s
=
for
((
key
,
value
)
<-
chunks
)
yield
value
.
seqstatR2
.
s
ummary
Option
(
Seqstat
.
mergeSummaries
(
s
.
toList
))
}
val
R1_proc
:
Option
[
Json
]
=
if
(
flexiprep
.
skipClip
&&
flexiprep
.
skipTrim
)
None