Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
b7ae99c2
Commit
b7ae99c2
authored
Sep 10, 2015
by
Wai Yi Leung
Browse files
Merge branch 'tooltests' into 'develop'
Tooltests Fixes #188, #196 and #200 See merge request !224
parents
00ca8854
9c9edf13
Changes
57
Expand all
Hide whitespace changes
Inline
Side-by-side
docs/tools/SamplesTsvToJson.md
View file @
b7ae99c2
...
...
@@ -58,7 +58,7 @@ To get the above example out of the tool one should provide 2 TSV files as follo
----
| sample
s
| library | bam |
| sample
| library | bam |
| ------- | ------- | --------- |
|Sample_ID_1 |Lib_ID_1 |MyFirst.bam |
|Sample_ID_2 |Lib_ID_2 |MySecond.bam |
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BastyGenerateFasta.scala
View file @
b7ae99c2
...
...
@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.tools
import
java.io.
{
File
,
PrintWriter
}
import
htsjdk.samtools.SamReaderFactory
import
htsjdk.samtools.
{
SAMSequenceRecord
,
SamReaderFactory
}
import
htsjdk.samtools.reference.IndexedFastaSequenceFile
import
htsjdk.variant.variantcontext.VariantContext
import
htsjdk.variant.vcf.VCFFileReader
...
...
@@ -28,6 +28,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import
scala.collection.JavaConversions._
import
scala.collection.mutable.ListBuffer
import
scala.collection.parallel.ParMap
class
BastyGenerateFasta
(
val
root
:
Configurable
)
extends
ToolCommandFuntion
with
Reference
{
javaMainClass
=
getClass
.
getName
...
...
@@ -155,7 +156,7 @@ object BastyGenerateFasta extends ToolCommand {
}
}
protected
var
cmdArgs
:
Args
=
_
protected
implicit
var
cmdArgs
:
Args
=
_
private
val
chunkSize
=
100000
/**
...
...
@@ -165,11 +166,18 @@ object BastyGenerateFasta extends ToolCommand {
val
argsParser
=
new
OptParser
cmdArgs
=
argsParser
.
parse
(
args
,
Args
())
getOrElse
sys
.
exit
(
1
)
if
(
cmdArgs
.
outputVariants
!=
null
)
writeVariantsOnly
()
if
(
cmdArgs
.
outputConsensus
!=
null
||
cmdArgs
.
outputConsensusVariants
!=
null
)
writeConsensus
()
if
(
cmdArgs
.
outputVariants
!=
null
)
{
writeVariantsOnly
()
}
if
(
cmdArgs
.
outputConsensus
!=
null
||
cmdArgs
.
outputConsensusVariants
!=
null
)
{
writeConsensus
()
}
//FIXME: what to do if outputcConsensus is set, but not outputConsensusVariants (and vice versa)?
}
protected
def
writeConsensus
()
{
//FIXME: preferably split this up in functions, so that they can be unit tested
val
referenceFile
=
new
IndexedFastaSequenceFile
(
cmdArgs
.
reference
)
val
referenceDict
=
referenceFile
.
getSequenceDictionary
...
...
@@ -253,7 +261,7 @@ object BastyGenerateFasta extends ToolCommand {
}
}
protected
def
writeVariantsOnly
()
{
protected
[
tools
]
def
writeVariantsOnly
()
{
val
writer
=
new
PrintWriter
(
cmdArgs
.
outputVariants
)
writer
.
println
(
">"
+
cmdArgs
.
outputName
)
val
vcfReader
=
new
VCFFileReader
(
cmdArgs
.
inputVcf
,
false
)
...
...
@@ -265,17 +273,34 @@ object BastyGenerateFasta extends ToolCommand {
vcfReader
.
close
()
}
protected
def
getMaxAllele
(
vcfRecord
:
VariantContext
)
:
String
=
{
// TODO: what does this do?
// Seems to me it finds the allele in a sample with the highest AD value
// if this allele is shorter than the largest allele, it will append '-' to the string
protected
[
tools
]
def
getMaxAllele
(
vcfRecord
:
VariantContext
)(
implicit
cmdArgs
:
Args
)
:
String
=
{
val
maxSize
=
getLongestAllele
(
vcfRecord
).
getBases
.
length
if
(
cmdArgs
.
sampleName
==
null
)
return
fillAllele
(
vcfRecord
.
getReference
.
getBaseString
,
maxSize
)
if
(
cmdArgs
.
sampleName
==
null
)
{
return
fillAllele
(
vcfRecord
.
getReference
.
getBaseString
,
maxSize
)
}
val
genotype
=
vcfRecord
.
getGenotype
(
cmdArgs
.
sampleName
)
if
(
genotype
==
null
)
return
fillAllele
(
""
,
maxSize
)
if
(
genotype
==
null
)
{
return
fillAllele
(
""
,
maxSize
)
}
val
AD
=
if
(
genotype
.
hasAD
)
genotype
.
getAD
else
Array
.
fill
(
vcfRecord
.
getAlleles
.
size
())(
cmdArgs
.
minAD
)
if
(
AD
==
null
)
return
fillAllele
(
""
,
maxSize
)
if
(
AD
==
null
)
{
return
fillAllele
(
""
,
maxSize
)
}
val
maxADid
=
AD
.
zipWithIndex
.
maxBy
(
_
.
_1
).
_2
if
(
AD
(
maxADid
)
<
cmdArgs
.
minAD
)
return
fillAllele
(
""
,
maxSize
)
if
(
AD
(
maxADid
)
<
cmdArgs
.
minAD
)
{
return
fillAllele
(
""
,
maxSize
)
}
fillAllele
(
vcfRecord
.
getAlleles
()(
maxADid
).
getBaseString
,
maxSize
)
}
}
\ No newline at end of file
}
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala
View file @
b7ae99c2
...
...
@@ -61,12 +61,18 @@ object BiopetFlagstat extends ToolCommand {
flagstat
}
case
class
Args
(
inputFile
:
File
=
null
,
summaryFile
:
Option
[
File
]
=
None
,
region
:
Option
[
String
]
=
None
)
extends
AbstractArgs
case
class
Args
(
inputFile
:
File
=
null
,
outputFile
:
Option
[
File
]
=
None
,
summaryFile
:
Option
[
File
]
=
None
,
region
:
Option
[
String
]
=
None
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'I'
,
"inputFile"
)
required
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
inputFile
=
x
)
}
text
"input bam file"
opt
[
File
](
'o'
,
"outputFile"
)
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputFile
=
Some
(
x
))
}
text
"output file"
opt
[
File
](
's'
,
"summaryFile"
)
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
summaryFile
=
Some
(
x
))
}
text
"summary output file"
...
...
@@ -151,7 +157,14 @@ object BiopetFlagstat extends ToolCommand {
writer
.
close
()
}
println
(
flagstatCollector
.
report
)
commandArgs
.
outputFile
match
{
case
Some
(
file
)
=>
{
val
writer
=
new
PrintWriter
(
file
)
writer
.
println
(
flagstatCollector
.
report
)
writer
.
close
()
}
case
_
=>
println
(
flagstatCollector
.
report
)
}
}
class
FlagstatCollector
{
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala
View file @
b7ae99c2
...
...
@@ -116,7 +116,7 @@ object CheckAllelesVcfInBam extends ToolCommand {
}
val
counts
=
for
(
samRecord
<-
bamIter
if
!
filterRead
(
samRecord
))
{
checkAlles
(
samRecord
,
vcfRecord
)
match
{
checkAlle
le
s
(
samRecord
,
vcfRecord
)
match
{
case
Some
(
a
)
=>
if
(
countReports
(
sample
).
aCounts
.
contains
(
a
))
countReports
(
sample
).
aCounts
(
a
)
+=
1
else
countReports
(
sample
).
aCounts
+=
(
a
->
1
)
case
_
=>
countReports
(
sample
).
notFound
+=
1
...
...
@@ -142,7 +142,7 @@ object CheckAllelesVcfInBam extends ToolCommand {
writer
.
close
()
}
def
checkAlles
(
samRecord
:
SAMRecord
,
vcfRecord
:
VariantContext
)
:
Option
[
String
]
=
{
def
checkAlle
le
s
(
samRecord
:
SAMRecord
,
vcfRecord
:
VariantContext
)
:
Option
[
String
]
=
{
val
readStartPos
=
List
.
range
(
0
,
samRecord
.
getReadBases
.
length
)
.
find
(
x
=>
samRecord
.
getReferencePositionAtReadPosition
(
x
+
1
)
==
vcfRecord
.
getStart
)
getOrElse
{
return
None
}
val
readBases
=
samRecord
.
getReadBases
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala
View file @
b7ae99c2
...
...
@@ -55,10 +55,10 @@ object FastqSplitter extends ToolCommand {
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'I'
,
"inputFile"
)
required
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
inputFile
=
x
)
}
text
"
out is a required file property
"
}
text
"
Path to input file
"
opt
[
File
](
'o'
,
"output"
)
required
()
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputFile
=
x
::
c
.
outputFile
)
}
text
"
out is a required file property
"
}
text
"
Path to output file
"
}
/**
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala
View file @
b7ae99c2
...
...
@@ -15,7 +15,7 @@
*/
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
java.io.
{
PrintWriter
,
File
}
import
htsjdk.samtools.
{
QueryInterval
,
SAMRecord
,
SamReaderFactory
,
ValidationStringency
}
import
nl.lumc.sasc.biopet.core.ToolCommand
...
...
@@ -24,15 +24,20 @@ import scala.collection.JavaConversions._
import
scala.io.Source
object
FindRepeatsPacBio
extends
ToolCommand
{
case
class
Args
(
inputBam
:
File
=
null
,
inputBed
:
File
=
null
)
extends
AbstractArgs
case
class
Args
(
inputBam
:
File
=
null
,
outputFile
:
Option
[
File
]
=
None
,
inputBed
:
File
=
null
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'I'
,
"inputBam"
)
required
()
maxOccurs
1
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
inputBam
=
x
)
}
}
text
"Path to input file"
opt
[
File
](
'o'
,
"outputFile"
)
maxOccurs
1
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputFile
=
Some
(
x
))
}
text
"Path to input file"
opt
[
File
](
'b'
,
"inputBed"
)
required
()
maxOccurs
1
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
inputBed
=
x
)
}
text
"
output file, default to stdout
"
}
text
"
Path to bed file
"
}
/**
...
...
@@ -50,7 +55,6 @@ object FindRepeatsPacBio extends ToolCommand {
val
header
=
List
(
"chr"
,
"startPos"
,
"stopPos"
,
"Repeat_seq"
,
"repeatLength"
,
"original_Repeat_readLength"
,
"Calculated_repeat_readLength"
,
"minLength"
,
"maxLength"
,
"inserts"
,
"deletions"
,
"notSpan"
)
println
(
header
.
mkString
(
"\t"
))
for
(
bedLine
<-
Source
.
fromFile
(
commandArgs
.
inputBed
).
getLines
();
...
...
@@ -84,9 +88,21 @@ object FindRepeatsPacBio extends ToolCommand {
if
(
length
<
minLength
||
minLength
==
-
1
)
minLength
=
length
}
}
println
(
List
(
chr
,
startPos
,
stopPos
,
typeRepeat
,
repeatLength
,
oriRepeatLength
,
calcRepeatLength
.
mkString
(
","
),
minLength
,
maxLength
,
inserts
.
mkString
(
"/"
),
deletions
.
mkString
(
"/"
),
notSpan
).
mkString
(
"\t"
))
bamIter
.
close
()
commandArgs
.
outputFile
match
{
case
Some
(
file
)
=>
{
val
writer
=
new
PrintWriter
(
file
)
writer
.
println
(
header
.
mkString
(
"\t"
))
writer
.
println
(
List
(
chr
,
startPos
,
stopPos
,
typeRepeat
,
repeatLength
,
oriRepeatLength
,
calcRepeatLength
.
mkString
(
","
),
minLength
,
maxLength
,
inserts
.
mkString
(
"/"
),
deletions
.
mkString
(
"/"
),
notSpan
).
mkString
(
"\t"
))
writer
.
close
()
}
case
_
=>
{
println
(
header
.
mkString
(
"\t"
))
println
(
List
(
chr
,
startPos
,
stopPos
,
typeRepeat
,
repeatLength
,
oriRepeatLength
,
calcRepeatLength
.
mkString
(
","
),
minLength
,
maxLength
,
inserts
.
mkString
(
"/"
),
deletions
.
mkString
(
"/"
),
notSpan
).
mkString
(
"\t"
))
}
}
}
}
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala
View file @
b7ae99c2
...
...
@@ -73,7 +73,7 @@ object SageCountFastq extends ToolCommand {
if
(
counts
.
contains
(
seq
))
counts
(
seq
)
+=
1
else
counts
+=
(
seq
->
1
)
count
+=
1
if
(
count
%
1000000
==
0
)
System
.
err
.
println
(
count
+
" sequences done"
)
if
(
count
%
1000000
==
0
)
logger
.
info
(
count
+
" sequences done"
)
}
})
logger
.
info
(
count
+
" sequences done"
)
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala
View file @
b7ae99c2
...
...
@@ -77,10 +77,10 @@ object SageCreateLibrary extends ToolCommand {
opt
[
Int
](
"length"
)
required
()
unbounded
()
action
{
(
x
,
c
)
=>
c
.
copy
(
length
=
x
)
}
opt
[
File
](
"noTagsOutput"
)
required
()
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
opt
[
File
](
"noTagsOutput"
)
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
noTagsOutput
=
x
)
}
opt
[
File
](
"noAntiTagsOutput"
)
required
()
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
opt
[
File
](
"noAntiTagsOutput"
)
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
noAntiTagsOutput
=
x
)
}
opt
[
File
](
"allGenesOutput"
)
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
...
...
@@ -88,8 +88,7 @@ object SageCreateLibrary extends ToolCommand {
}
}
var
tagRegex
:
Regex
=
null
var
geneRegex
=
"""ENSG[0-9]{11}"""
.
r
val
geneRegex
=
"""ENSG[0-9]{11}"""
.
r
val
tagGenesMap
:
mutable.Map
[
String
,
TagGenes
]
=
mutable
.
Map
()
...
...
@@ -114,23 +113,24 @@ object SageCreateLibrary extends ToolCommand {
if
(!
commandArgs
.
input
.
exists
)
throw
new
IllegalStateException
(
"Input file not found, file: "
+
commandArgs
.
input
)
tagRegex
=
(
commandArgs
.
tag
+
"[CATG]{"
+
commandArgs
.
length
+
"}"
).
r
val
tagRegex
=
(
commandArgs
.
tag
+
"[CATG]{"
+
commandArgs
.
length
+
"}"
).
r
var
count
=
0
System
.
err
.
println
(
"Reading fasta file"
)
logger
.
info
(
"Reading fasta file"
)
val
reader
=
FastaReaderHelper
.
readFastaDNASequence
(
commandArgs
.
input
)
System
.
err
.
println
(
"Finding tags"
)
logger
.
info
(
"Finding tags"
)
for
((
name
,
seq
)
<-
reader
)
{
getTags
(
name
,
seq
)
val
result
=
getTags
(
name
,
seq
,
tagRegex
)
addTagresultToTaglib
(
name
,
result
)
count
+=
1
if
(
count
%
10000
==
0
)
System
.
err
.
println
(
count
+
" transcripts done"
)
if
(
count
%
10000
==
0
)
logger
.
info
(
count
+
" transcripts done"
)
}
System
.
err
.
println
(
count
+
" transcripts done"
)
logger
.
info
(
count
+
" transcripts done"
)
System
.
err
.
println
(
"Start sorting tags"
)
logger
.
info
(
"Start sorting tags"
)
val
tagGenesMapSorted
:
SortedMap
[
String
,
TagGenes
]
=
SortedMap
(
tagGenesMap
.
toArray
:
_
*
)
System
.
err
.
println
(
"Writting output files"
)
logger
.
info
(
"Writting output files"
)
val
writer
=
new
PrintWriter
(
commandArgs
.
output
)
writer
.
println
(
"#tag\tfirstTag\tAllTags\tFirstAntiTag\tAllAntiTags"
)
for
((
tag
,
genes
)
<-
tagGenesMapSorted
)
{
...
...
@@ -167,7 +167,7 @@ object SageCreateLibrary extends ToolCommand {
}
}
def
addTagresultToTaglib
(
name
:
String
,
tagResult
:
TagResult
)
{
private
def
addTagresultToTaglib
(
name
:
String
,
tagResult
:
TagResult
)
{
val
id
=
name
.
split
(
" "
).
head
//.stripPrefix("hg19_ensGene_")
val
geneID
=
geneRegex
.
findFirstIn
(
name
).
getOrElse
(
"unknown_gene"
)
allGenes
.
add
(
geneID
)
...
...
@@ -195,15 +195,13 @@ object SageCreateLibrary extends ToolCommand {
}
}
def
getTags
(
name
:
String
,
seq
:
DNASequence
)
:
TagResult
=
{
def
getTags
(
name
:
String
,
seq
:
DNASequence
,
tagRegex
:
Regex
)
:
TagResult
=
{
val
allTags
:
List
[
String
]
=
for
(
tag
<-
tagRegex
.
findAllMatchIn
(
seq
.
getSequenceAsString
).
toList
)
yield
tag
.
toString
()
val
firstTag
=
if
(
allTags
.
isEmpty
)
null
else
allTags
.
last
val
allAntiTags
:
List
[
String
]
=
for
(
tag
<-
tagRegex
.
findAllMatchIn
(
seq
.
getReverseComplement
.
getSequenceAsString
).
toList
)
yield
tag
.
toString
()
val
firstAntiTag
=
if
(
allAntiTags
.
isEmpty
)
null
else
allAntiTags
.
head
val
result
=
new
TagResult
(
firstTag
,
allTags
,
firstAntiTag
,
allAntiTags
)
addTagresultToTaglib
(
name
,
result
)
result
}
}
\ No newline at end of file
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala
View file @
b7ae99c2
...
...
@@ -37,7 +37,7 @@ class SageCreateTagCounts(val root: Configurable) extends ToolCommandFuntion {
@Output
(
doc
=
"Sense count file"
,
shortName
=
"sense"
,
required
=
true
)
var
countSense
:
File
=
_
@Output
(
doc
=
"Sense all coun file
t
"
,
shortName
=
"allsense"
,
required
=
true
)
@Output
(
doc
=
"Sense all coun
t
file"
,
shortName
=
"allsense"
,
required
=
true
)
var
countAllSense
:
File
=
_
@Output
(
doc
=
"AntiSense count file"
,
shortName
=
"antisense"
,
required
=
true
)
...
...
@@ -148,9 +148,18 @@ object SageCreateTagCounts extends ToolCommand {
writer
.
close
()
}
}
writeFile
(
commandArgs
.
countSense
,
senseCounts
)
writeFile
(
commandArgs
.
countAllSense
,
allSenseCounts
)
writeFile
(
commandArgs
.
countAntiSense
,
antiSenseCounts
)
writeFile
(
commandArgs
.
countAllAntiSense
,
allAntiSenseCounts
)
if
(
commandArgs
.
countSense
!=
null
)
{
writeFile
(
commandArgs
.
countSense
,
senseCounts
)
}
if
(
commandArgs
.
countAllAntiSense
!=
null
)
{
writeFile
(
commandArgs
.
countAllAntiSense
,
allAntiSenseCounts
)
}
if
(
commandArgs
.
countAllSense
!=
null
)
{
writeFile
(
commandArgs
.
countAllSense
,
allSenseCounts
)
}
if
(
commandArgs
.
countAntiSense
!=
null
)
{
writeFile
(
commandArgs
.
countAntiSense
,
antiSenseCounts
)
}
}
}
\ No newline at end of file
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala
View file @
b7ae99c2
...
...
@@ -15,7 +15,7 @@
*/
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
java.io.
{
PrintWriter
,
File
}
import
nl.lumc.sasc.biopet.core.ToolCommand
import
nl.lumc.sasc.biopet.utils.ConfigUtils._
...
...
@@ -27,12 +27,15 @@ import scala.io.Source
* This tool can convert a tsv to a json file
*/
object
SamplesTsvToJson
extends
ToolCommand
{
case
class
Args
(
inputFiles
:
List
[
File
]
=
Nil
)
extends
AbstractArgs
case
class
Args
(
inputFiles
:
List
[
File
]
=
Nil
,
outputFile
:
Option
[
File
]
=
None
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
opt
[
File
](
'i'
,
"inputFiles"
)
required
()
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
inputFiles
=
x
::
c
.
inputFiles
)
}
text
"Input must be a tsv file, first line is seen as header and must at least have a 'sample' column, 'library' column is optional, multiple files allowed"
opt
[
File
](
'o'
,
"outputFile"
)
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputFile
=
Some
(
x
))
}
}
/** Executes SamplesTsvToJson */
...
...
@@ -40,41 +43,53 @@ object SamplesTsvToJson extends ToolCommand {
val
argsParser
=
new
OptParser
val
commandArgs
:
Args
=
argsParser
.
parse
(
args
,
Args
())
getOrElse
sys
.
exit
(
1
)
val
fileMaps
=
for
(
inputFile
<-
commandArgs
.
inputFiles
)
yield
{
val
reader
=
Source
.
fromFile
(
inputFile
)
val
lines
=
reader
.
getLines
().
toList
.
filter
(!
_
.
isEmpty
)
val
header
=
lines
.
head
.
split
(
"\t"
)
val
sampleColumn
=
header
.
indexOf
(
"sample"
)
val
libraryColumn
=
header
.
indexOf
(
"library"
)
if
(
sampleColumn
==
-
1
)
throw
new
IllegalStateException
(
"Sample column does not exist in: "
+
inputFile
)
val
jsonString
=
stringFromInputs
(
commandArgs
.
inputFiles
)
commandArgs
.
outputFile
match
{
case
Some
(
file
)
=>
{
val
writer
=
new
PrintWriter
(
file
)
writer
.
println
(
jsonString
)
writer
.
close
()
}
case
_
=>
println
(
jsonString
)
}
}
val
sampleLibCache
:
mutable.Set
[(
String
,
Option
[
String
])]
=
mutable
.
Set
()
def
mapFromFile
(
inputFile
:
File
)
:
Map
[
String
,
Any
]
=
{
val
reader
=
Source
.
fromFile
(
inputFile
)
val
lines
=
reader
.
getLines
().
toList
.
filter
(!
_
.
isEmpty
)
val
header
=
lines
.
head
.
split
(
"\t"
)
val
sampleColumn
=
header
.
indexOf
(
"sample"
)
val
libraryColumn
=
header
.
indexOf
(
"library"
)
if
(
sampleColumn
==
-
1
)
throw
new
IllegalStateException
(
"Sample column does not exist in: "
+
inputFile
)
val
librariesValues
:
List
[
Map
[
String
,
Any
]]
=
for
(
tsvLine
<-
lines
.
tail
)
yield
{
val
values
=
tsvLine
.
split
(
"\t"
)
require
(
header
.
length
==
values
.
length
,
"Number of columns is not the same as the header"
)
val
sample
=
values
(
sampleColumn
)
val
library
=
if
(
libraryColumn
!=
-
1
)
Some
(
values
(
libraryColumn
))
else
None
val
sampleLibCache
:
mutable.Set
[(
String
,
Option
[
String
])]
=
mutable
.
Set
()
//FIXME: this is a workaround, should be removed after fixing #180
if
(
sample
.
head
.
isDigit
||
library
.
forall
(
_
.
head
.
isDigit
))
throw
new
IllegalStateException
(
"Sample or library may not start with a number"
)
val
librariesValues
:
List
[
Map
[
String
,
Any
]]
=
for
(
tsvLine
<-
lines
.
tail
)
yield
{
val
values
=
tsvLine
.
split
(
"\t"
)
require
(
header
.
length
==
values
.
length
,
"Number of columns is not the same as the header"
)
val
sample
=
values
(
sampleColumn
)
val
library
=
if
(
libraryColumn
!=
-
1
)
Some
(
values
(
libraryColumn
))
else
None
if
(
sampleLibCache
.
contains
((
sample
,
library
)))
throw
new
IllegalStateException
(
s
"Combination of $sample and $library is found multiple times"
)
else
sampleLibCache
.
add
((
sample
,
library
))
val
valuesMap
=
(
for
(
t
<-
0
until
values
.
size
if
!
values
(
t
).
isEmpty
&&
t
!=
sampleColumn
&&
t
!=
libraryColumn
)
yield
header
(
t
)
->
values
(
t
)).
toMap
library
match
{
case
Some
(
lib
)
=>
Map
(
"samples"
->
Map
(
sample
->
Map
(
"libraries"
->
Map
(
library
->
valuesMap
))))
case
_
=>
Map
(
"samples"
->
Map
(
sample
->
valuesMap
))
}
//FIXME: this is a workaround, should be removed after fixing #180
if
(
sample
.
head
.
isDigit
||
library
.
forall
(
_
.
head
.
isDigit
))
throw
new
IllegalStateException
(
"Sample or library may not start with a number"
)
if
(
sampleLibCache
.
contains
((
sample
,
library
)))
throw
new
IllegalStateException
(
s
"Combination of $sample ${library.map("
and
" + _).getOrElse("")} is found multiple times"
)
else
sampleLibCache
.
add
((
sample
,
library
))
val
valuesMap
=
(
for
(
t
<-
0
until
values
.
size
if
!
values
(
t
).
isEmpty
&&
t
!=
sampleColumn
&&
t
!=
libraryColumn
)
yield
header
(
t
)
->
values
(
t
)).
toMap
library
match
{
case
Some
(
lib
)
=>
Map
(
"samples"
->
Map
(
sample
->
Map
(
"libraries"
->
Map
(
lib
->
valuesMap
))))
case
_
=>
Map
(
"samples"
->
Map
(
sample
->
valuesMap
))
}
librariesValues
.
foldLeft
(
Map
[
String
,
Any
]())((
acc
,
kv
)
=>
mergeMaps
(
acc
,
kv
))
}
val
map
=
fileMaps
.
foldLeft
(
Map
[
String
,
Any
]())((
acc
,
kv
)
=>
mergeMaps
(
acc
,
kv
))
val
json
=
mapToJson
(
map
)
println
(
json
.
spaces2
)
librariesValues
.
foldLeft
(
Map
[
String
,
Any
]())((
acc
,
kv
)
=>
mergeMaps
(
acc
,
kv
))
}
def
stringFromInputs
(
inputs
:
List
[
File
])
:
String
=
{
val
map
=
inputs
.
map
(
f
=>
mapFromFile
(
f
)).
foldLeft
(
Map
[
String
,
Any
]())((
acc
,
kv
)
=>
mergeMaps
(
acc
,
kv
))
mapToJson
(
map
).
spaces2
}
}
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala
View file @
b7ae99c2
...
...
@@ -15,7 +15,7 @@
*/
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
java.io.
{
PrintWriter
,
File
}
import
htsjdk.samtools.fastq.
{
FastqReader
,
FastqRecord
}
import
nl.lumc.sasc.biopet.core.config.Configurable
...
...
@@ -45,7 +45,7 @@ class SeqStat(val root: Configurable) extends ToolCommandFuntion with Summarizab
override
def
defaultCoreMemory
=
2.5
override
def
commandLine
=
super
.
commandLine
+
required
(
"-i"
,
input
)
+
" > "
+
required
(
output
)
override
def
commandLine
=
super
.
commandLine
+
required
(
"-i"
,
input
)
+
required
(
"-o"
,
output
)
def
summaryStats
:
Map
[
String
,
Any
]
=
{
val
map
=
ConfigUtils
.
fileToConfigMap
(
output
)
...
...
@@ -108,7 +108,7 @@ object SeqStat extends ToolCommand {
private
var
baseQualHistoMap
:
mutable.Map
[
Int
,
Long
]
=
mutable
.
Map
(
0
->
0
)
private
var
readQualHistoMap
:
mutable.Map
[
Int
,
Long
]
=
mutable
.
Map
(
0
->
0
)
case
class
Args
(
fastq
:
File
=
n
ew
File
(
""
)
)
extends
AbstractArgs
case
class
Args
(
fastq
:
File
=
n
ull
,
outputJson
:
Option
[
File
]
=
None
)
extends
AbstractArgs
class
OptParser
extends
AbstractOptParser
{
...
...
@@ -117,11 +117,14 @@ object SeqStat extends ToolCommand {
|$commandName - Summarize FastQ
"""
.
stripMargin
)
opt
[
File
](
'i'
,
"fastq"
)
required
()
valueName
"<fastq>"
action
{
(
x
,
c
)
=>
opt
[
File
](
'i'
,
"fastq"
)
required
()
unbounded
()
valueName
"<fastq>"
action
{
(
x
,
c
)
=>
c
.
copy
(
fastq
=
x
)
}
validate
{
x
=>
if
(
x
.
exists
)
success
else
failure
(
"FASTQ file not found"
)
}
text
"FastQ file to generate stats from"
opt
[
File
](
'o'
,
"output"
)
unbounded
()
valueName
"<json>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputJson
=
Some
(
x
))
}
text
"File to write output to, if not supplied output go to stdout"
}
/**
...
...
@@ -317,6 +320,13 @@ object SeqStat extends ToolCommand {
))
)
println
(
ConfigUtils
.
mapToJson
(
report
))
commandArgs
.
outputJson
match
{
case
Some
(
file
)
=>
{
val
writer
=
new
PrintWriter
(
file
)
writer
.
println
(
ConfigUtils
.
mapToJson
(
report
))
writer
.
close
()
}
case
_
=>
println
(
ConfigUtils
.
mapToJson
(
report
))
}
}
}
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SummaryToTsv.scala
View file @
b7ae99c2
...
...
@@ -15,7 +15,8 @@
*/
package
nl.lumc.sasc.biopet.tools
import
java.io.File
import
java.io.
{
PrintWriter
,
File
}
import
java.nio.file.Paths
import
nl.lumc.sasc.biopet.core.ToolCommand
import
nl.lumc.sasc.biopet.core.summary.Summary
...
...
@@ -35,15 +36,27 @@ object SummaryToTsv extends ToolCommand {
opt
[
File
](
's'
,
"summary"
)
required
()
unbounded
()
maxOccurs
1
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
summary
=
x
)
}
opt
[
File
](
'o'
,
"output
"
)
maxOccurs
1
unbounded
()
valueName
"<file>"
action
{
(
x
,
c
)
=>
opt
[
File
](
'o'
,
"output
File"
)
unbounded
()
maxOccurs
1
valueName
"<file>"
action
{
(
x
,
c
)
=>
c
.
copy
(
outputFile
=
Some
(
x
))
}
opt
[
String
](
'p'
,
"path"
)
required
()
unbounded
()
valueName
"<
value
>"
action
{
(
x
,
c
)
=>
opt
[
String
](
'p'
,
"path"
)
required
()
unbounded
()
valueName
"<
string
>"
action
{
(
x
,
c
)
=>
c
.
copy
(
values
=
c
.
values
:::
x
::
Nil
)
}