Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
c1913ad8
Commit
c1913ad8
authored
May 11, 2016
by
Wai Yi Leung
Browse files
Merge branch 'develop' into fix-make-rc_trimmingoptional
parents
6104d68e
bffd796d
Changes
31
Hide whitespace changes
Inline
Side-by-side
bam2wig/pom.xml
View file @
c1913ad8
...
...
@@ -45,6 +45,18 @@
<artifactId>
BiopetExtensions
</artifactId>
<version>
${project.version}
</version>
</dependency>
<dependency>
<groupId>
org.scalatest
</groupId>
<artifactId>
scalatest_2.10
</artifactId>
<version>
2.2.1
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
org.testng
</groupId>
<artifactId>
testng
</artifactId>
<version>
6.8
</version>
<scope>
test
</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
bam2wig/src/test/resources/empty.bam
0 → 100644
View file @
c1913ad8
File added
bam2wig/src/test/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/BamToChromSizesTest.scala
0 → 100644
View file @
c1913ad8
package
nl.lumc.sasc.biopet.pipelines.bamtobigwig
import
java.io.File
import
java.nio.file.Paths
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
import
scala.io.Source
/**
* Created by pjvanthof on 09/05/16.
*/
class
BamToChromSizesTest
extends
TestNGSuite
with
Matchers
{
private
def
resourcePath
(
p
:
String
)
:
String
=
{
Paths
.
get
(
getClass
.
getResource
(
p
).
toURI
).
toString
}
@Test
def
testChromSizes
:
Unit
=
{
val
bamFile
=
new
File
(
resourcePath
(
"/empty.bam"
))
val
bamToChromSizes
=
new
BamToChromSizes
(
null
)
bamToChromSizes
.
bamFile
=
bamFile
bamToChromSizes
.
chromSizesFile
=
File
.
createTempFile
(
"chrom."
,
".sizes"
)
bamToChromSizes
.
chromSizesFile
.
deleteOnExit
()
bamToChromSizes
.
run
()
Source
.
fromFile
(
bamToChromSizes
.
chromSizesFile
).
getLines
().
toList
shouldBe
List
(
"chrQ\t10000"
,
"chrR\t10000"
)
}
}
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala
View file @
c1913ad8
...
...
@@ -30,18 +30,16 @@ import scala.collection.mutable.ListBuffer
*/
object
WriteDependencies
extends
Logging
with
Configurable
{
val
root
:
Configurable
=
null
private
val
functionNames
:
mutable.Map
[
QFunction
,
String
]
=
mutable
.
Map
()
private
def
createFunctionNames
(
functions
:
Seq
[
QFunction
])
:
Unit
=
{
private
def
createFunctionNames
(
functions
:
Seq
[
QFunction
])
:
Map
[
QFunction
,
String
]
=
{
val
cache
:
mutable.Map
[
String
,
Int
]
=
mutable
.
Map
()
for
(
function
<-
functions
)
{
(
for
(
function
<-
functions
)
yield
{
val
baseName
=
function
match
{
case
f
:
Configurable
=>
f
.
configNamespace
case
f
=>
f
.
getClass
.
getSimpleName
}
cache
+=
baseName
->
(
cache
.
getOrElse
(
baseName
,
0
)
+
1
)
functionNames
+=
function
->
s
"$baseName-${cache(baseName)}"
}
function
->
s
"$baseName-${cache(baseName)}"
}
).
toMap
}
/**
...
...
@@ -55,7 +53,7 @@ object WriteDependencies extends Logging with Configurable {
val
errorOnMissingInput
:
Boolean
=
config
(
"error_on_missing_input"
,
false
)
createFunctionNames
(
functions
)
val
functionNames
=
createFunctionNames
(
functions
)
case
class
QueueFile
(
file
:
File
)
{
private
val
inputJobs
:
ListBuffer
[
QFunction
]
=
ListBuffer
()
...
...
biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/WriteDependenciesTest.scala
0 → 100644
View file @
c1913ad8
package
nl.lumc.sasc.biopet.core
import
java.io.File
import
java.nio.file.Files
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
org.broadinstitute.gatk.queue.function.QFunction
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
import
scala.io.Source
/**
* Created by pjvanthof on 09/05/16.
*/
class
WriteDependenciesTest
extends
TestNGSuite
with
Matchers
{
import
WriteDependenciesTest._
case
class
Qfunc
(
in
:
Seq
[
File
],
out
:
Seq
[
File
])
extends
QFunction
{
override
def
inputs
=
in
override
def
outputs
=
out
override
def
doneOutputs
=
out
.
map
(
x
=>
new
File
(
x
.
getParentFile
,
s
".${x.getName}.done"
))
override
def
failOutputs
=
out
.
map
(
x
=>
new
File
(
x
.
getParentFile
,
s
".${x.getName}.fail"
))
jobOutputFile
=
new
File
(
out
.
head
+
".out"
)
}
@Test
def
testDeps
:
Unit
=
{
val
outputFile
=
File
.
createTempFile
(
"deps."
,
".json"
)
outputFile
.
deleteOnExit
()
val
func1
=
Qfunc
(
file1
::
Nil
,
file2
::
Nil
)
val
func2
=
Qfunc
(
file2
::
Nil
,
file3
::
Nil
)
WriteDependencies
.
writeDependencies
(
func1
::
func2
::
Nil
,
outputFile
)
val
deps
=
ConfigUtils
.
fileToConfigMap
(
outputFile
)
deps
(
"jobs"
)
shouldBe
a
[
Map
[
_
,
_
]]
val
jobs
=
deps
(
"jobs"
).
asInstanceOf
[
Map
[
String
,
Map
[
String
,
Any
]]]
jobs
.
count
(
_
.
_1
.
contains
(
"Qfunc"
))
shouldBe
2
deps
(
"files"
)
shouldBe
a
[
List
[
_
]]
val
files
=
deps
(
"files"
).
asInstanceOf
[
List
[
Map
[
String
,
Any
]]]
val
paths
=
files
.
map
(
x
=>
x
.
get
(
"path"
)).
flatten
assert
(
paths
.
contains
(
file1
.
toString
))
assert
(
paths
.
contains
(
file2
.
toString
))
assert
(
paths
.
contains
(
file3
.
toString
))
files
.
find
(
_
.
get
(
"path"
)
==
Some
(
file1
.
toString
)).
flatMap
(
_
.
get
(
"pipeline_input"
))
shouldBe
Some
(
true
)
files
.
find
(
_
.
get
(
"path"
)
==
Some
(
file2
.
toString
)).
flatMap
(
_
.
get
(
"pipeline_input"
))
shouldBe
Some
(
false
)
files
.
find
(
_
.
get
(
"path"
)
==
Some
(
file3
.
toString
)).
flatMap
(
_
.
get
(
"pipeline_input"
))
shouldBe
Some
(
false
)
}
}
object
WriteDependenciesTest
{
val
tempDir
=
Files
.
createTempDirectory
(
"test"
).
toFile
tempDir
.
deleteOnExit
()
val
file1
=
new
File
(
tempDir
,
"file1.txt"
)
val
file2
=
new
File
(
tempDir
,
"file2.txt"
)
val
file3
=
new
File
(
tempDir
,
"file3.txt"
)
}
\ No newline at end of file
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala
View file @
c1913ad8
...
...
@@ -24,6 +24,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import
scala.collection.mutable
import
scala.io.Source
import
scala.util.matching.Regex
/**
* Extension for cutadapt
...
...
@@ -163,6 +164,51 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
(
if
(
outputAsStsout
)
""
else
required
(
"--output"
,
fastqOutput
)
+
" > "
+
required
(
statsOutput
))
def
extractClippedAdapters
(
statsOutput
:
File
)
:
Map
[
String
,
Any
]
=
{
val
histoCountRow
:
Regex
=
"""([\d]+)\t([\d]+)\t.*"""
.
r
val
adapterR
=
"""Sequence: ([C|T|A|G]+);.*Trimmed: ([\d]+) times\."""
.
r
val
statsFile
=
Source
.
fromFile
(
statsOutput
)
val
adapterRawStats
:
Array
[
String
]
=
statsFile
.
mkString
.
split
(
"=== Adapter [\\d]+ ==="
)
.
filter
(
_
.
contains
(
"Sequence"
)
)
statsFile
.
close
()
adapterRawStats
.
map
(
adapter
=>
{
var
adapterName
=
""
var
adapterCount
=
0
// identify the adapter name and count
for
(
line
<-
adapter
.
split
(
"\n"
))
{
line
match
{
case
adapterR
(
adapter
,
count
)
=>
{
adapterName
=
adapter
adapterCount
=
count
.
toInt
}
case
_
=>
}
}
// parse the block that gives the histogram of clipped bases and from which end
val
counts
=
adapter
.
split
(
"Overview of removed sequences "
)
.
filter
(
x
=>
x
.
contains
(
"length"
))
.
map
(
clipSideRawStats
=>
{
val
clipSideLabel
=
if
(
clipSideRawStats
.
contains
(
"5'"
))
{
"5p"
}
else
{
"3p"
}
val
histogramValues
=
clipSideRawStats
.
split
(
"\n"
).
flatMap
({
case
histoCountRow
(
length
,
count
)
=>
Some
(
length
.
toInt
->
count
.
toInt
)
case
_
=>
None
})
clipSideLabel
->
histogramValues
.
toMap
})
adapterName
->
Map
(
"count"
->
adapterCount
,
"histogram"
->
counts
.
toMap
)
}).
toMap
// converting the Array[String] containing map-items to Map with 'toMap'
}
/** Output summary stats */
def
summaryStats
:
Map
[
String
,
Any
]
=
{
/**
...
...
@@ -177,7 +223,6 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
val
tooLongR
=
""".* that were too long: *([,\d]+) .*"""
.
r
val
tooManyN
=
""".* with too many N: *([,\d]+) .*"""
.
r
val
adapterR
=
"""Sequence ([C|T|A|G]*);.*Trimmed: ([,\d]+) times."""
.
r
val
basePairsProcessed
=
"""Total basepairs processed: *([,\d]+) bp"""
.
r
val
basePairsWritten
=
"""Total written \(filtered\): *([,\d]+) bp .*"""
.
r
...
...
@@ -192,24 +237,28 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
"bpoutput"
->
0
,
"toomanyn"
->
0
)
val
adapterStats
:
mutable.Map
[
String
,
Long
]
=
mutable
.
Map
()
// extract the adapters with its histogram
val
adapterStats
=
if
(
statsOutput
.
exists
)
{
extractClippedAdapters
(
statsOutput
)
}
else
Map
.
empty
if
(
statsOutput
.
exists
)
{
val
statsFile
=
Source
.
fromFile
(
statsOutput
)
for
(
line
<-
statsFile
.
getLines
())
{
line
match
{
case
processedReads
(
m
)
=>
stats
(
"processed"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
withAdapters
(
m
)
=>
stats
(
"withadapters"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
readsPassingFilters
(
m
)
=>
stats
(
"passingfilters"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
tooShortR
(
m
)
=>
stats
(
"tooshort"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
tooLongR
(
m
)
=>
stats
(
"toolong"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
tooManyN
(
m
)
=>
stats
(
"toomanyn"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
basePairsProcessed
(
m
)
=>
stats
(
"bpinput"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
basePairsWritten
(
m
)
=>
stats
(
"bpoutput"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
adapterR
(
adapter
,
count
)
=>
adapterStats
+=
(
adapter
->
count
.
toLong
)
case
_
=>
case
processedReads
(
m
)
=>
stats
(
"processed"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
withAdapters
(
m
)
=>
stats
(
"withadapters"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
readsPassingFilters
(
m
)
=>
stats
(
"passingfilters"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
tooShortR
(
m
)
=>
stats
(
"tooshort"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
tooLongR
(
m
)
=>
stats
(
"toolong"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
tooManyN
(
m
)
=>
stats
(
"toomanyn"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
basePairsProcessed
(
m
)
=>
stats
(
"bpinput"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
basePairsWritten
(
m
)
=>
stats
(
"bpoutput"
)
=
m
.
replaceAll
(
","
,
""
).
toLong
case
_
=>
}
}
statsFile
.
close
()
}
val
cleanReads
=
stats
(
"processed"
)
-
stats
(
"withadapters"
)
...
...
@@ -223,8 +272,8 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
"num_reads_discarded_too_long"
->
stats
(
"toolong"
),
"num_reads_discarded_many_n"
->
stats
(
"toomanyn"
),
"num_bases_input"
->
stats
(
"bpinput"
),
"num_base
d
_output"
->
stats
(
"bpoutput"
),
adaptersStatsName
->
adapterStats
.
toMap
"num_base
s
_output"
->
stats
(
"bpoutput"
),
adaptersStatsName
->
adapterStats
)
}
...
...
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala
View file @
c1913ad8
...
...
@@ -69,7 +69,7 @@ class CombineGVCFs(val root: Configurable) extends CommandLineGATK with ScatterG
override
def
cmdLine
=
super
.
cmdLine
+
repeat
(
"-A"
,
annotation
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-G"
,
group
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
.
getOrElse
(
null
)
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-V"
,
variant
,
formatPrefix
=
TaggedFile
.
formatCommandLineParameter
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-o"
,
out
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
conditional
(
convertToBasePairResolution
,
"-bpResolution"
,
escape
=
true
,
format
=
"%s"
)
+
...
...
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala
View file @
c1913ad8
...
...
@@ -106,16 +106,12 @@ class GenotypeGVCFs(val root: Configurable) extends CommandLineGATK with Scatter
@Gather
(
enabled
=
false
)
private
var
outputIndex
:
File
=
_
@Output
@Gather
(
enabled
=
false
)
private
var
dbsnpIndex
:
File
=
_
override
def
beforeGraph
()
{
super
.
beforeGraph
()
deps
++=
variant
.
filter
(
orig
=>
orig
!=
null
&&
(!
orig
.
getName
.
endsWith
(
".list"
))).
map
(
orig
=>
VcfUtils
.
getVcfIndexFile
(
orig
))
if
(
out
!=
null
&&
!
org
.
broadinstitute
.
gatk
.
utils
.
io
.
IOUtils
.
isSpecialFile
(
out
))
outputIndex
=
VcfUtils
.
getVcfIndexFile
(
out
)
dbsnp
.
foreach
(
x
=>
d
bsnpIndex
=
VcfUtils
.
getVcfIndexFile
(
x
))
dbsnp
.
foreach
(
x
=>
d
eps
:+
=
VcfUtils
.
getVcfIndexFile
(
x
))
}
override
def
cmdLine
=
super
.
cmdLine
+
...
...
@@ -133,7 +129,7 @@ class GenotypeGVCFs(val root: Configurable) extends CommandLineGATK with Scatter
optional
(
"-ploidy"
,
sample_ploidy
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-A"
,
annotation
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-G"
,
group
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
.
getOrElse
(
null
)
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
conditional
(
filter_reads_with_N_cigar
,
"-filterRNC"
,
escape
=
true
,
format
=
"%s"
)
+
conditional
(
filter_mismatching_base_and_quals
,
"-filterMBQ"
,
escape
=
true
,
format
=
"%s"
)
+
conditional
(
filter_bases_not_stored
,
"-filterNoBases"
,
escape
=
true
,
format
=
"%s"
)
...
...
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala
View file @
c1913ad8
...
...
@@ -419,7 +419,7 @@ class HaplotypeCaller(val root: Configurable) extends CommandLineGATK with Scatt
optional
(
"-o"
,
out
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-likelihoodEngine"
,
likelihoodCalculationEngine
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-hksr"
,
heterogeneousKmerSizeResolution
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
.
getOrElse
(
null
)
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
conditional
(
dontTrimActiveRegions
,
"-dontTrimActiveRegions"
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-maxDiscARExtension"
,
maxDiscARExtension
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-maxGGAARExtension"
,
maxGGAARExtension
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
...
...
@@ -444,7 +444,7 @@ class HaplotypeCaller(val root: Configurable) extends CommandLineGATK with Scatt
repeat
(
"-inputPrior"
,
input_prior
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-ploidy"
,
sample_ploidy
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-gt_mode"
,
genotyping_mode
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-alleles"
,
alleles
),
alleles
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-alleles"
,
alleles
.
getOrElse
(
null
)
),
alleles
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-contamination"
,
contamination_fraction_to_filter
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
contamination_fraction_to_filterFormat
)
+
optional
(
"-contaminationFile"
,
contamination_fraction_per_sample_file
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-pnrm"
,
p_nonref_model
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
...
...
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala
View file @
c1913ad8
...
...
@@ -218,8 +218,8 @@ class SelectVariants(val root: Configurable) extends CommandLineGATK with Scatte
override
def
cmdLine
=
super
.
cmdLine
+
required
(
TaggedFile
.
formatCommandLineParameter
(
"-V"
,
variant
),
variant
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-disc"
,
discordance
),
discordance
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-conc"
,
concordance
),
concordance
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-disc"
,
discordance
.
getOrElse
(
null
)
),
discordance
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-conc"
,
concordance
.
getOrElse
(
null
)
),
concordance
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-o"
,
out
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-sn"
,
sample_name
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-se"
,
sample_expressions
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
...
...
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala
View file @
c1913ad8
...
...
@@ -276,7 +276,7 @@ class UnifiedGenotyper(val root: Configurable) extends CommandLineGATK with Scat
conditional
(
ignoreSNPAlleles
,
"-ignoreSNPAlleles"
,
escape
=
true
,
format
=
"%s"
)
+
conditional
(
allReadsSP
,
"-dl"
,
escape
=
true
,
format
=
"%s"
)
+
conditional
(
ignoreLaneInfo
,
"-ignoreLane"
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-referenceCalls"
,
reference_sample_calls
),
reference_sample_calls
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-referenceCalls"
,
reference_sample_calls
.
getOrElse
(
null
)
),
reference_sample_calls
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-refsample"
,
reference_sample_name
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-minqs"
,
min_quality_score
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-maxqs"
,
max_quality_score
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
...
...
@@ -291,14 +291,14 @@ class UnifiedGenotyper(val root: Configurable) extends CommandLineGATK with Scat
repeat
(
"-inputPrior"
,
input_prior
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-ploidy"
,
sample_ploidy
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-gt_mode"
,
genotyping_mode
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-alleles"
,
alleles
),
alleles
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-alleles"
,
alleles
.
getOrElse
(
null
)
),
alleles
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-contamination"
,
contamination_fraction_to_filter
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
contamination_fraction_to_filterFormat
)
+
optional
(
"-contaminationFile"
,
contamination_fraction_per_sample_file
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-pnrm"
,
p_nonref_model
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-logExactCalls"
,
exactcallslog
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-out_mode"
,
output_mode
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
conditional
(
allSitePLs
,
"-allSitePLs"
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
.
getOrElse
(
null
)
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-comp"
,
comp
,
formatPrefix
=
TaggedFile
.
formatCommandLineParameter
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-o"
,
out
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-onlyEmitSamples"
,
onlyEmitSamples
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
...
...
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala
View file @
c1913ad8
...
...
@@ -104,8 +104,8 @@ class VariantAnnotator(val root: Configurable) extends CommandLineGATK with Scat
override
def
cmdLine
=
super
.
cmdLine
+
required
(
TaggedFile
.
formatCommandLineParameter
(
"-V"
,
variant
),
variant
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-snpEffFile"
,
snpEffFile
),
snpEffFile
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-snpEffFile"
,
snpEffFile
.
getOrElse
()
),
snpEffFile
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
.
getOrElse
()
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-comp"
,
comp
,
formatPrefix
=
TaggedFile
.
formatCommandLineParameter
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-resource"
,
resource
,
formatPrefix
=
TaggedFile
.
formatCommandLineParameter
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
"-o"
,
out
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
...
...
biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala
View file @
c1913ad8
...
...
@@ -135,8 +135,8 @@ class VariantEval(val root: Configurable) extends CommandLineGATK {
optional
(
"-o"
,
out
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-eval"
,
eval
,
formatPrefix
=
TaggedFile
.
formatCommandLineParameter
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-comp"
,
comp
,
formatPrefix
=
TaggedFile
.
formatCommandLineParameter
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-gold"
,
goldStandard
),
goldStandard
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-D"
,
dbsnp
.
getOrElse
(
null
)
),
dbsnp
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
optional
(
TaggedFile
.
formatCommandLineParameter
(
"-gold"
,
goldStandard
.
getOrElse
(
null
)
),
goldStandard
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-select"
,
select_exps
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-selectName"
,
select_names
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
repeat
(
"-sn"
,
sample
,
spaceSeparated
=
true
,
escape
=
true
,
format
=
"%s"
)
+
...
...
biopet-tools-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilterTest.scala
View file @
c1913ad8
...
...
@@ -19,7 +19,7 @@ import java.io.File
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.
{
DataProvider
,
Test
}
import
org.testng.annotations.Test
/**
* Created by ahbbollen on 2-3-16.
...
...
@@ -44,34 +44,26 @@ class VcfFilterTest extends TestNGSuite with Matchers {
filterer
.
outputVcfIndex
.
getAbsolutePath
shouldBe
oVcf
.
getAbsolutePath
+
".tbi"
}
@DataProvider
(
name
=
"functions"
)
def
functions
=
{
Array
(
()
=>
testCommand
(
minSampleDepth
=
Some
(
2
)),
()
=>
testCommand
(
minTotalDepth
=
Some
(
2
)),
()
=>
testCommand
(
minAlternateDepth
=
Some
(
2
)),
()
=>
testCommand
(
minSamplesPass
=
Some
(
2
)),
()
=>
testCommand
(
minGenomeQuality
=
Some
(
50
)),
()
=>
testCommand
(
filterRefCalls
=
true
),
()
=>
testCommand
(
invertedOutputVcf
=
Some
(
File
.
createTempFile
(
"vcfFilter"
,
".vcf"
))),
()
=>
testCommand
(
resToDom
=
Some
(
"dummy"
)),
()
=>
testCommand
(
trioCompound
=
Some
(
"dummy"
)),
()
=>
testCommand
(
deNovoInSample
=
Some
(
"dummy"
)),
()
=>
testCommand
(
deNovoTrio
=
Some
(
"dummy"
)),
()
=>
testCommand
(
trioLossOfHet
=
Some
(
"dummy"
)),
()
=>
testCommand
(
mustHaveVariant
=
List
(
"sample1"
,
"sample2"
)),
()
=>
testCommand
(
calledIn
=
List
(
"sample1"
,
"sample2"
)),
()
=>
testCommand
(
mustHaveGenotype
=
List
(
"sample1:HET"
,
"sample2:HET"
)),
()
=>
testCommand
(
diffGenotype
=
List
(
"sample1:sample2"
,
"sample2:sample3"
)),
()
=>
testCommand
(
minQualScore
=
Some
(
50.0
)),
()
=>
testCommand
(
filterHetVarToHomVar
=
List
(
"dummy"
)),
()
=>
testCommand
(
id
=
List
(
"rs01"
,
"rs02"
)),
()
=>
testCommand
(
idFile
=
Some
(
File
.
createTempFile
(
"vcfFilter"
,
".txt"
)))
).
map
(
Array
(
_
))
}
@Test
(
dataProvider
=
"functions"
)
def
executer
(
function0
:
Function0
[
Unit
])
:
Unit
=
function0
()
@Test
def
testMinSampleDepth
()
=
testCommand
(
minSampleDepth
=
Some
(
2
))
@Test
def
testMinTotalDepth
()
=
testCommand
(
minTotalDepth
=
Some
(
2
))
@Test
def
testMinAlternateDepth
()
=
testCommand
(
minAlternateDepth
=
Some
(
2
))
@Test
def
testMinSamplesPass
()
=
testCommand
(
minSamplesPass
=
Some
(
2
))
@Test
def
testMinGenomeQuality
()
=
testCommand
(
minGenomeQuality
=
Some
(
50
))
@Test
def
testFilterRefCalls
()
=
testCommand
(
filterRefCalls
=
true
)
@Test
def
testInvertedOutputVcf
()
=
testCommand
(
invertedOutputVcf
=
Some
(
File
.
createTempFile
(
"vcfFilter"
,
".vcf"
)))
@Test
def
testResToDom
()
=
testCommand
(
resToDom
=
Some
(
"dummy"
))
@Test
def
testTrioCompound
()
=
testCommand
(
trioCompound
=
Some
(
"dummy"
))
@Test
def
testDeNovoInSample
()
=
testCommand
(
deNovoInSample
=
Some
(
"dummy"
))
@Test
def
testDeNovoTrio
()
=
testCommand
(
deNovoTrio
=
Some
(
"dummy"
))
@Test
def
testTrioLossOfHet
()
=
testCommand
(
trioLossOfHet
=
Some
(
"dummy"
))
@Test
def
testMustHaveVariant
()
=
testCommand
(
mustHaveVariant
=
List
(
"sample1"
,
"sample2"
))
@Test
def
testCalledIn
()
=
testCommand
(
calledIn
=
List
(
"sample1"
,
"sample2"
))
@Test
def
testMustHaveGenotype
()
=
testCommand
(
mustHaveGenotype
=
List
(
"sample1:HET"
,
"sample2:HET"
))
@Test
def
testDiffGenotype
()
=
testCommand
(
diffGenotype
=
List
(
"sample1:sample2"
,
"sample2:sample3"
))
@Test
def
testMinQualScore
()
=
testCommand
(
minQualScore
=
Some
(
50.0
))
@Test
def
testFilterHetVarToHomVar
()
=
testCommand
(
filterHetVarToHomVar
=
List
(
"dummy"
))
@Test
def
testId
()
=
testCommand
(
id
=
List
(
"rs01"
,
"rs02"
))
@Test
def
testIdFile
()
=
testCommand
(
idFile
=
Some
(
File
.
createTempFile
(
"vcfFilter"
,
".txt"
)))
protected
def
testCommand
(
minSampleDepth
:
Option
[
Int
]
=
None
,
minTotalDepth
:
Option
[
Int
]
=
None
,
...
...
biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/Summary.scala
View file @
c1913ad8
...
...
@@ -61,7 +61,7 @@ class Summary(file: File) {
}
/** Executes given function for each sample */
def
getSampleValues
(
function
:
(
Summary
,
String
)
=>
Option
[
Any
])
:
Map
[
String
,
Option
[
Any
]]
=
{
def
getSampleValues
[
T
]
(
function
:
(
Summary
,
String
)
=>
Option
[
T
])
:
Map
[
String
,
Option
[
T
]]
=
{
(
for
(
sample
<-
samples
)
yield
sample
->
function
(
this
,
sample
)).
toMap
}
...
...
@@ -95,7 +95,7 @@ class Summary(file: File) {
* @param function Function to execute
* @return (sampleId, libId) -> value
*/
def
getLibraryValues
(
function
:
(
Summary
,
String
,
String
)
=>
Option
[
Any
])
:
Map
[(
String
,
String
)
,
Option
[
Any
]]
=
{
def
getLibraryValues
[
T
]
(
function
:
(
Summary
,
String
,
String
)
=>
Option
[
T
])
:
Map
[(
String
,
String
)
,
Option
[
T
]]
=
{
(
for
(
sample
<-
samples
;
lib
<-
libraries
.
getOrElse
(
sample
,
Set
()))
yield
{
(
sample
,
lib
)
->
function
(
this
,
sample
,
lib
)
}).
toMap
...
...
biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/summary/SummaryValue.scala
View file @
c1913ad8
...
...
@@ -33,7 +33,6 @@ case class SummaryValue(value: Option[Any]) {
})
}
//TODO: Calculations are not yet used somewhere, needs more testing
def
+
(
that
:
SummaryValue
)
:
SummaryValue
=
{
(
this
.
value
,
that
.
value
)
match
{
case
(
Some
(
a
:
Double
),
Some
(
b
))
=>
SummaryValue
(
Some
(
a
+
b
.
toString
.
toDouble
))
...
...
biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/IoUtilsTest.scala
0 → 100644
View file @
c1913ad8
package
nl.lumc.sasc.biopet.utils
import
java.io.
{
File
,
FileNotFoundException
,
PrintWriter
}
import
java.nio.file.Files
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
import
scala.io.Source
/**
* Created by pjvanthof on 05/05/16.
*/
class
IoUtilsTest
extends
TestNGSuite
with
Matchers
{
def
createTempTestFile
(
file
:
File
)
:
Unit
=
{
file
.
getParentFile
.
mkdirs
()
val
writer
=
new
PrintWriter
(
file
)
writer
.
println
(
"test"
)
writer
.
close
()
file
.
deleteOnExit
()
}
@Test
def
testCopyFile
:
Unit
=
{
val
temp1
=
File
.
createTempFile
(
"test."
,
".txt"
)
temp1
.
deleteOnExit
()
val
temp2
=
File
.
createTempFile
(
"test."
,
".txt"
)
temp2
.
deleteOnExit
()
createTempTestFile
(
temp1
)
IoUtils
.
copyFile
(
temp1
,
temp2
)
val
reader
=
Source
.
fromFile
(
temp2
)
reader
.
getLines
().
toList
shouldBe
List
(
"test"
)
reader
.
close
()
}
@Test
def
testCopyFileNonExistingDir
:
Unit
=
{
val
temp1
=
File
.
createTempFile
(
"test."
,
".txt"
)
val
tempDir
=
new
File
(
Files
.
createTempDirectory
(
"test"
).
toFile
,
"non-exist"
)
tempDir
.
deleteOnExit
()
tempDir
shouldNot
exist
val
temp2
=
new
File
(
tempDir
,
"test.txt"
)
createTempTestFile
(
temp1
)
intercept
[
FileNotFoundException
]
{
IoUtils
.
copyFile
(
temp1
,
temp2
)
}
IoUtils
.
copyFile
(
temp1
,
temp2
,
true
)
val
reader
=
Source
.
fromFile
(
temp2
)
reader
.
getLines
().
toList
shouldBe
List
(
"test"
)
reader
.
close
()
}
@Test
def
testCopyDir
:
Unit
=
{
val
tempDir1
=
Files
.
createTempDirectory
(
"test"
).
toFile
tempDir1