Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
d93b8076
Commit
d93b8076
authored
Feb 16, 2015
by
Wai Yi Leung
Browse files
Seqstat in Flexiprep
parent
23f42d6b
Changes
3
Hide whitespace changes
Inline
Side-by-side
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/Seqstat.scala
View file @
d93b8076
...
...
@@ -6,12 +6,12 @@
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
scala.annotation.tailrec
import
org.broadinstitute.gatk.utils.commandline.
{
Output
,
Input
}
import
scala.collection.JavaConverters._
import
scala.collection.JavaConversions._
import
scala.collection.mutable
import
scala.collection.
parallel.
mutable.
Par
Map
import
scala.
collection.mutable.
{
Map
=>
MutMap
}
import
scala.collection.
im
mutable.
{
Map
}
import
scala.
io.Source
import
scala.language.postfixOps
import
htsjdk.samtools.fastq.
{
FastqReader
,
FastqRecord
}
...
...
@@ -30,6 +30,23 @@ import nl.lumc.sasc.biopet.utils.ConfigUtils
*/
class
Seqstat
(
val
root
:
Configurable
)
extends
BiopetJavaCommandLineFunction
{
javaMainClass
=
getClass
.
getName
@Input
(
doc
=
"Input FASTQ"
,
shortName
=
"input"
,
required
=
true
)
var
input
:
File
=
_
@Output
(
doc
=
"Output JSON"
,
shortName
=
"output"
,
required
=
true
)
var
output
:
File
=
_
override
val
defaultVmem
=
"4G"
memoryLimit
=
Option
(
3.0
)
override
def
commandLine
=
super
.
commandLine
+
required
(
"-i"
,
input
)
+
" > "
+
required
(
output
)
def
getSummary
:
Json
=
{
val
json
=
Parse
.
parseOption
(
Source
.
fromFile
(
output
).
mkString
)
if
(
json
.
isEmpty
)
return
jNull
else
return
json
.
get
.
fieldOrEmptyObject
(
"stats"
)
}
}
object
FqEncoding
extends
Enumeration
{
...
...
@@ -40,6 +57,80 @@ object FqEncoding extends Enumeration {
}
object
Seqstat
extends
ToolCommand
{
def
apply
(
root
:
Configurable
,
input
:
File
,
output
:
File
)
:
Seqstat
=
{
val
seqstat
=
new
Seqstat
(
root
)
seqstat
.
input
=
input
seqstat
.
output
=
output
return
seqstat
}
def
apply
(
root
:
Configurable
,
fastqfile
:
File
,
outDir
:
String
)
:
Seqstat
=
{
val
seqstat
=
new
Seqstat
(
root
)
val
ext
=
fastqfile
.
getName
.
substring
(
fastqfile
.
getName
.
lastIndexOf
(
"."
))
seqstat
.
input
=
fastqfile
seqstat
.
output
=
new
File
(
outDir
+
fastqfile
.
getName
.
substring
(
0
,
fastqfile
.
getName
.
lastIndexOf
(
"."
))
+
".seqstats.json"
)
return
seqstat
}
def
mergeSummaries
(
jsons
:
List
[
Json
])
:
Json
=
{
def
addJson
(
json
:
Json
,
total
:
mutable.Map
[
String
,
Long
])
{
for
(
key
<-
json
.
objectFieldsOrEmpty
)
{
if
(
json
.
field
(
key
).
get
.
isObject
)
addJson
(
json
.
field
(
key
).
get
,
total
)
else
if
(
json
.
field
(
key
).
get
.
isNumber
)
{
val
number
=
json
.
field
(
key
).
get
.
numberOrZero
.
toLong
if
(
total
.
contains
(
key
))
{
if
(
key
==
"len_min"
)
{
if
(
total
(
key
)
>
number
)
total
(
key
)
=
number
}
else
if
(
key
==
"len_max"
)
{
if
(
total
(
key
)
<
number
)
total
(
key
)
=
number
}
else
total
(
key
)
+=
number
}
else
total
+=
(
key
->
number
)
}
}
}
var
basesTotal
:
mutable.Map
[
String
,
Long
]
=
mutable
.
Map
()
var
readsTotal
:
mutable.Map
[
String
,
Long
]
=
mutable
.
Map
()
var
encoding
:
Set
[
Json
]
=
Set
()
for
(
json
<-
jsons
)
{
encoding
+=
json
.
fieldOrEmptyString
(
"qual_encoding"
)
val
bases
=
json
.
fieldOrEmptyObject
(
"bases"
)
addJson
(
bases
,
basesTotal
)
val
reads
=
json
.
fieldOrEmptyObject
(
"reads"
)
addJson
(
reads
,
readsTotal
)
}
return
(
"bases"
:=
(
(
"num_n"
:=
basesTotal
(
"num_n"
))
->:
(
"num_total"
:=
basesTotal
(
"num_total"
))
->:
(
"num_qual_gte"
:=
(
(
"1"
:=
basesTotal
(
"1"
))
->:
(
"10"
:=
basesTotal
(
"10"
))
->:
(
"20"
:=
basesTotal
(
"20"
))
->:
(
"30"
:=
basesTotal
(
"30"
))
->:
(
"40"
:=
basesTotal
(
"40"
))
->:
(
"50"
:=
basesTotal
(
"50"
))
->:
(
"60"
:=
basesTotal
(
"60"
))
->:
jEmptyObject
))
->:
jEmptyObject
))
->:
(
"reads"
:=
(
(
"num_with_n"
:=
readsTotal
(
"num_with_n"
))
->:
(
"num_total"
:=
readsTotal
(
"num_total"
))
->:
(
"len_min"
:=
readsTotal
(
"len_min"
))
->:
(
"len_max"
:=
readsTotal
(
"len_max"
))
->:
(
"num_mean_qual_gte"
:=
(
(
"1"
:=
readsTotal
(
"1"
))
->:
(
"10"
:=
readsTotal
(
"10"
))
->:
(
"20"
:=
readsTotal
(
"20"
))
->:
(
"30"
:=
readsTotal
(
"30"
))
->:
(
"40"
:=
readsTotal
(
"40"
))
->:
(
"50"
:=
readsTotal
(
"50"
))
->:
(
"60"
:=
readsTotal
(
"60"
))
->:
jEmptyObject
))
->:
jEmptyObject
))
->:
(
"qual_encoding"
:=
encoding
.
head
)
->:
jEmptyObject
}
import
FqEncoding._
var
phredEncoding
:
FqEncoding.Value
=
Sanger
...
...
@@ -254,7 +345,7 @@ object Seqstat extends ToolCommand {
val
commandArgs
:
Args
=
parseArgs
(
args
)
logger
.
info
(
"Start"
)
logger
.
info
(
"Start
seqstat
"
)
val
reader
=
new
FastqReader
(
commandArgs
.
fastq
)
val
numReads
=
seqStat
(
reader
)
...
...
@@ -262,7 +353,7 @@ object Seqstat extends ToolCommand {
logger
.
debug
(
nucs
)
// logger.debug(baseStats)
logger
.
info
(
"
D
one"
)
logger
.
info
(
"
Seqstat d
one"
)
val
report
:
Map
[
String
,
Any
]
=
Map
(
(
"files"
,
...
...
@@ -292,7 +383,9 @@ object Seqstat extends ToolCommand {
))
)
val
jsonReport
:
Json
=
ConfigUtils
.
mapToJson
(
report
)
val
jsonReport
:
Json
=
{
ConfigUtils
.
mapToJson
(
report
)
}
println
(
jsonReport
.
spaces2
)
}
}
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala
View file @
d93b8076
...
...
@@ -20,7 +20,8 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Argument }
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.
{
Gzip
,
Pbzip2
,
Md5sum
,
Zcat
,
Seqstat
}
import
nl.lumc.sasc.biopet.extensions.
{
Gzip
,
Pbzip2
,
Md5sum
,
Zcat
}
import
nl.lumc.sasc.biopet.tools.Seqstat
import
nl.lumc.sasc.biopet.tools.FastqSync
class
Flexiprep
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
...
...
public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala
View file @
d93b8076
...
...
@@ -17,7 +17,8 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep
import
java.io.PrintWriter
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.extensions.
{
Md5sum
,
Seqstat
}
import
nl.lumc.sasc.biopet.extensions.
{
Md5sum
}
import
nl.lumc.sasc.biopet.tools.Seqstat
import
nl.lumc.sasc.biopet.tools.FastqSync
import
org.broadinstitute.gatk.queue.function.InProcessFunction
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment