Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
0efd042b
Commit
0efd042b
authored
Dec 01, 2016
by
Sander Bollen
Browse files
Merge branch 'fix-BIOPET-425' into 'develop'
Fix biopet 425 Fixes BIOPET-425 and BIOPET-426 See merge request !483
parents
1337c894
643b1d89
Changes
34
Hide whitespace changes
Inline
Side-by-side
bam2wig/src/main/scala/nl/lumc/sasc/biopet/pipelines/bamtobigwig/Bam2Wig.scala
View file @
0efd042b
...
...
@@ -68,8 +68,6 @@ object Bam2Wig extends PipelineCommand {
val
bamToBigWig
=
new
Bam2Wig
(
root
)
bamToBigWig
.
outputDir
=
bamFile
.
getParentFile
bamToBigWig
.
bamFile
=
bamFile
bamToBigWig
.
init
()
bamToBigWig
.
biopetScript
()
bamToBigWig
}
}
\ No newline at end of file
bammetrics/src/test/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetricsTest.scala
View file @
0efd042b
...
...
@@ -114,6 +114,7 @@ object BamMetricsTest {
copyFile
(
"ref.fa.fai"
)
val
executables
=
Map
(
"skip_write_dependencies"
->
true
,
"refFlat"
->
"bla.refFlat"
,
"reference_fasta"
->
(
outputDir
+
File
.
separator
+
"ref.fa"
),
"samtools"
->
Map
(
"exe"
->
"test"
),
...
...
basty/src/test/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTest.scala
View file @
0efd042b
...
...
@@ -144,6 +144,7 @@ object BastyTest {
copyFile
(
"ref.fa.fai"
)
val
config
=
Map
(
"skip_write_dependencies"
->
true
,
"name_prefix"
->
"test"
,
"cache"
->
true
,
"dir"
->
"test"
,
...
...
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala
View file @
0efd042b
...
...
@@ -36,6 +36,8 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
var
executable
:
String
=
_
var
mainFunction
=
true
/** This is the default shell for drmaa jobs */
def
defaultRemoteCommand
=
"bash"
private
val
remoteCommand
:
String
=
config
(
"remote_command"
,
default
=
defaultRemoteCommand
)
...
...
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala
View file @
0efd042b
...
...
@@ -16,7 +16,7 @@ package nl.lumc.sasc.biopet.core
import
java.io.File
import
nl.lumc.sasc.biopet.core.summary.SummaryQScript
import
nl.lumc.sasc.biopet.core.summary.
{
SummaryQScript
,
WriteSummary
}
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import
nl.lumc.sasc.biopet.utils.Logging
...
...
@@ -63,6 +63,8 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
/** Returns the extension to make the report */
def
reportClass
:
Option
[
ReportBuilderExtension
]
=
None
val
skipWriteDependencies
:
Boolean
=
config
(
"skip_write_dependencies"
,
default
=
false
)
/** Script from queue itself, final to force some checks for each pipeline and write report */
final
def
script
()
{
outputDir
=
config
(
"output_dir"
)
...
...
@@ -85,7 +87,9 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
this
match
{
case
q
:
MultiSampleQScript
if
q.onlySamples.nonEmpty
&&
!q.samples.forall
(
x
=>
q.onlySamples.contains
(
x.
_
1
))
=>
logger
.
info
(
"Write report is skipped because sample flag is used"
)
case
_
=>
reportClass
.
foreach
(
add
(
_
))
case
_
=>
reportClass
.
foreach
{
report
=>
add
(
report
)
}
}
logger
.
info
(
"Running pre commands"
)
...
...
@@ -95,7 +99,8 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
f
.
beforeGraph
()
f
.
internalBeforeGraph
()
f
.
commandLine
case
_
=>
case
f
:
WriteSummary
=>
f
.
init
()
case
_
=>
}
if
(
outputDir
.
getParentFile
.
canWrite
||
(
outputDir
.
exists
&&
outputDir
.
canWrite
))
...
...
@@ -118,7 +123,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
}
})
if
(
logger
.
isDebugEnabled
)
WriteDependencies
.
writeDependencies
(
functions
,
new
File
(
outputDir
,
s
".log
/${
qSettings.runName
}.deps.json"
)
)
if
(
!
skipWriteDependencies
)
WriteDependencies
.
writeDependencies
(
functions
,
new
File
(
outputDir
,
".log
"
),
qSettings
.
runName
)
Logging
.
checkErrors
()
logger
.
info
(
"Script complete without errors"
)
...
...
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala
View file @
0efd042b
...
...
@@ -16,9 +16,11 @@ package nl.lumc.sasc.biopet.core
import
java.io.
{
File
,
PrintWriter
}
import
nl.lumc.sasc.biopet.core.summary.WriteSummary
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
nl.lumc.sasc.biopet.utils.
{
Logging
,
ConfigUtils
}
import
nl.lumc.sasc.biopet.utils.
{
ConfigUtils
,
Logging
}
import
org.broadinstitute.gatk.queue.function.
{
CommandLineFunction
,
QFunction
}
import
scala.collection.mutable
import
scala.collection.mutable.ListBuffer
...
...
@@ -37,7 +39,7 @@ object WriteDependencies extends Logging with Configurable {
case
f
=>
f
.
getClass
.
getSimpleName
}
cache
+=
baseName
->
(
cache
.
getOrElse
(
baseName
,
0
)
+
1
)
function
->
s
"$baseName
-
${cache(baseName)}"
function
->
s
"$
{
baseName
.replaceAll("
-
", "
_
")}_
${cache(baseName)}"
}).
toMap
}
...
...
@@ -45,9 +47,10 @@ object WriteDependencies extends Logging with Configurable {
* This method will generate a json file where information about job and file dependencies are stored
*
* @param functions This should be all functions that are given to the graph of Queue
* @param outputFile Json file to write dependencies to
* @param outputDir
* @param prefix prefix
*/
def
writeDependencies
(
functions
:
Seq
[
QFunction
],
output
File
:
File
)
:
Unit
=
{
def
writeDependencies
(
functions
:
Seq
[
QFunction
],
output
Dir
:
File
,
prefix
:
String
)
:
Unit
=
{
logger
.
info
(
"Start calculating dependencies"
)
val
errorOnMissingInput
:
Boolean
=
config
(
"error_on_missing_input"
,
false
)
...
...
@@ -107,27 +110,89 @@ object WriteDependencies extends Logging with Configurable {
val
jobs
=
functionNames
.
par
.
map
{
case
(
f
,
name
)
=>
name
->
Map
(
"command"
->
(
f
match
{
name
.
toString
->
Map
(
"command"
->
(
f
match
{
case
cmd
:
CommandLineFunction
=>
cmd
.
commandLine
case
_
=>
None
}),
"main_job"
->
(
f
match
{
case
cmd
:
BiopetCommandLineFunction
=>
cmd
.
mainFunction
case
s
:
WriteSummary
if
s.qscript.root
=
=
null
=>
true
case
_
=>
false
}),
"intermediate"
->
f
.
isIntermediate
,
"depends_on_intermediate"
->
f
.
inputs
.
exists
(
files
(
_
).
isIntermediate
),
"depends_on_jobs"
->
f
.
inputs
.
toList
.
flatMap
(
files
(
_
).
outputJobNames
).
distinct
,
"output_used_by_jobs"
->
outputFiles
(
f
).
toList
.
flatMap
(
files
(
_
).
inputJobNames
).
distinct
,
"outputs"
->
outputFiles
(
f
).
toList
,
"inputs"
->
f
.
inputs
.
toList
,
"done_files"
->
f
.
doneOutputs
.
toList
,
"fail_files"
->
f
.
failOutputs
.
toList
,
"stdout_file"
->
f
.
jobOutputFile
,
"done_at_start"
->
f
.
isDone
,
"fail_at_start"
->
f
.
isFail
)
}.
toIterator
.
toMap
val
outputFile
=
new
File
(
outputDir
,
s
"$prefix.deps.json"
)
logger
.
info
(
s
"Writing dependencies to: $outputFile"
)
val
writer
=
new
PrintWriter
(
outputFile
)
writer
.
println
(
ConfigUtils
.
mapToJson
(
Map
(
"jobs"
->
jobs
.
toMap
,
"jobs"
->
jobs
,
"files"
->
files
.
values
.
par
.
map
(
_
.
getMap
).
toList
)).
spaces2
)
writer
.
close
()
val
jobsDeps
=
jobs
.
map
(
x
=>
x
.
_1
->
(
x
.
_2
(
"depends_on_jobs"
)
match
{
case
l
:
List
[
_
]
=>
l
.
map
(
_
.
toString
)
case
_
=>
throw
new
IllegalStateException
(
"Value 'depends_on_jobs' is not a list"
)
}))
val
jobsWriter
=
new
PrintWriter
(
new
File
(
outputDir
,
s
"$prefix.jobs.json"
))
jobsWriter
.
println
(
ConfigUtils
.
mapToJson
(
jobsDeps
).
spaces2
)
jobsWriter
.
close
()
writeGraphvizFile
(
jobsDeps
,
new
File
(
outputDir
,
s
"$prefix.jobs.gv"
))
writeGraphvizFile
(
compressOnType
(
jobsDeps
),
new
File
(
outputDir
,
s
"$prefix.compress.jobs.gv"
))
val
mainJobs
=
jobs
.
filter
(
_
.
_2
(
"main_job"
)
==
true
).
map
{
case
(
name
,
job
)
=>
name
->
getMainDependencies
(
name
,
jobs
)
}
val
mainJobsWriter
=
new
PrintWriter
(
new
File
(
outputDir
,
s
"$prefix.main_jobs.json"
))
mainJobsWriter
.
println
(
ConfigUtils
.
mapToJson
(
mainJobs
).
spaces2
)
mainJobsWriter
.
close
()
writeGraphvizFile
(
mainJobs
,
new
File
(
outputDir
,
s
"$prefix.main_jobs.gv"
))
writeGraphvizFile
(
compressOnType
(
mainJobs
),
new
File
(
outputDir
,
s
"$prefix.compress.main_jobs.gv"
))
logger
.
info
(
"done calculating dependencies"
)
}
def
getMainDependencies
(
jobName
:
String
,
jobsMap
:
Map
[
String
,
Map
[
String
,
Any
]])
:
List
[
String
]
=
{
val
job
=
jobsMap
(
jobName
)
val
dependencies
=
job
(
"depends_on_jobs"
)
match
{
case
l
:
List
[
_
]
=>
l
.
map
(
_
.
toString
)
}
dependencies
.
flatMap
{
dep
=>
jobsMap
(
dep
)(
"main_job"
)
match
{
case
true
=>
List
(
dep
)
case
false
=>
getMainDependencies
(
dep
,
jobsMap
)
}
}.
distinct
}
val
numberRegex
=
"""(.*)_(\d*)$"""
.
r
def
compressOnType
(
jobs
:
Map
[
String
,
List
[
String
]])
:
Map
[
String
,
List
[
String
]]
=
{
val
set
=
for
((
job
,
deps
)
<-
jobs
.
toSet
;
dep
<-
deps
)
yield
{
job
match
{
case
numberRegex
(
name
,
number
)
=>
(
name
,
dep
match
{
case
numberRegex
(
name
,
number
)
=>
name
})
}
}
set
.
groupBy
(
_
.
_1
).
map
(
x
=>
x
.
_1
->
x
.
_2
.
map
(
_
.
_2
).
toList
)
}
def
writeGraphvizFile
(
jobs
:
Map
[
String
,
List
[
String
]],
outputFile
:
File
)
:
Unit
=
{
val
writer
=
new
PrintWriter
(
outputFile
)
writer
.
println
(
"digraph graphname {"
)
jobs
.
foreach
{
case
(
a
,
b
)
=>
b
.
foreach
(
c
=>
writer
.
println
(
s
" $c -> $a;"
))
}
writer
.
println
(
"}"
)
writer
.
close
()
}
}
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/Md5sum.scala
View file @
0efd042b
...
...
@@ -22,6 +22,8 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for md5sum */
class
Md5sum
(
val
root
:
Configurable
)
extends
BiopetCommandLineFunction
with
Version
{
mainFunction
=
false
@Input
(
doc
=
"Input"
)
var
input
:
File
=
_
...
...
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala
View file @
0efd042b
...
...
@@ -49,15 +49,23 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
//TODO: add more checksums types
override
def
freezeFieldValues
()
:
Unit
=
{
for
(
q
<-
qscript
.
summaryQScripts
)
deps
:+=
q
.
summaryFile
init
()
super
.
freezeFieldValues
()
}
def
init
()
:
Unit
=
{
for
(
q
<-
qscript
.
summaryQScripts
)
deps
:+=
q
.
summaryFile
for
((
_
,
l
)
<-
qscript
.
summarizables
;
s
<-
l
)
s
match
{
case
f
:
QFunction
=>
deps
:+=
f
.
firstOutput
case
f
:
QFunction
=>
try
{
deps
:+=
f
.
firstOutput
}
catch
{
case
e
:
NullPointerException
=>
logger
.
warn
(
"Queue values are not init"
)
}
case
_
=>
}
jobOutputFile
=
new
File
(
out
.
getParentFile
,
".%s.%s.out"
.
format
(
out
.
getName
,
analysisName
))
super
.
freezeFieldValues
()
}
/** Function to create summary */
...
...
biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/WriteDependenciesTest.scala
View file @
0efd042b
...
...
@@ -15,16 +15,14 @@
package
nl.lumc.sasc.biopet.core
import
java.io.File
import
java.nio.file.Files
import
com.google.common.io.Files
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
org.broadinstitute.gatk.queue.function.QFunction
import
org.scalatest.Matchers
import
org.scalatest.testng.TestNGSuite
import
org.testng.annotations.Test
import
scala.io.Source
/**
* Created by pjvanthof on 09/05/16.
*/
...
...
@@ -42,11 +40,14 @@ class WriteDependenciesTest extends TestNGSuite with Matchers {
@Test
def
testDeps
:
Unit
=
{
val
outputFile
=
File
.
createTempFile
(
"deps."
,
".json"
)
val
tempDir
=
Files
.
createTempDir
()
tempDir
.
deleteOnExit
()
val
prefix
=
"test"
val
outputFile
=
new
File
(
tempDir
,
s
"$prefix.deps.json"
)
outputFile
.
deleteOnExit
()
val
func1
=
Qfunc
(
file1
::
Nil
,
file2
::
Nil
)
val
func2
=
Qfunc
(
file2
::
Nil
,
file3
::
Nil
)
WriteDependencies
.
writeDependencies
(
func1
::
func2
::
Nil
,
outputFile
)
WriteDependencies
.
writeDependencies
(
func1
::
func2
::
Nil
,
tempDir
,
prefix
)
val
deps
=
ConfigUtils
.
fileToConfigMap
(
outputFile
)
deps
(
"jobs"
)
shouldBe
a
[
Map
[
_
,
_
]]
val
jobs
=
deps
(
"jobs"
).
asInstanceOf
[
Map
[
String
,
Map
[
String
,
Any
]]]
...
...
@@ -66,7 +67,7 @@ class WriteDependenciesTest extends TestNGSuite with Matchers {
}
object
WriteDependenciesTest
{
val
tempDir
=
Files
.
createTempDir
ectory
(
"test"
).
toFile
val
tempDir
=
Files
.
createTempDir
()
tempDir
.
deleteOnExit
()
val
file1
=
new
File
(
tempDir
,
"file1.txt"
)
val
file2
=
new
File
(
tempDir
,
"file2.txt"
)
...
...
biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/SeqStat.scala
View file @
0efd042b
...
...
@@ -30,6 +30,8 @@ import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
class
SeqStat
(
val
root
:
Configurable
)
extends
ToolCommandFunction
with
Summarizable
{
def
toolObject
=
nl
.
lumc
.
sasc
.
biopet
.
tools
.
SeqStat
mainFunction
=
false
@Input
(
doc
=
"Input FASTQ"
,
shortName
=
"input"
,
required
=
true
)
var
input
:
File
=
null
...
...
biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala
View file @
0efd042b
...
...
@@ -32,6 +32,8 @@ import scala.io.Source
class
VcfStats
(
val
root
:
Configurable
)
extends
ToolCommandFunction
with
Summarizable
with
Reference
{
def
toolObject
=
nl
.
lumc
.
sasc
.
biopet
.
tools
.
VcfStats
mainFunction
=
false
@Input
(
doc
=
"Input fastq"
,
shortName
=
"I"
,
required
=
true
)
var
input
:
File
=
_
...
...
carp/src/test/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpTest.scala
View file @
0efd042b
...
...
@@ -117,6 +117,7 @@ object CarpTest {
copyFile
(
"ref.fa.fai"
)
val
executables
=
Map
(
"skip_write_dependencies"
->
true
,
"reference_fasta"
->
(
outputDir
+
File
.
separator
+
"ref.fa"
),
"fastqc"
->
Map
(
"exe"
->
"test"
),
"seqtk"
->
Map
(
"exe"
->
"test"
),
...
...
flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepTest.scala
View file @
0efd042b
...
...
@@ -115,6 +115,7 @@ object FlexiprepTest {
Files
.
touch
(
r2Zipped
)
val
executables
=
Map
(
"skip_write_dependencies"
->
true
,
"seqstat"
->
Map
(
"exe"
->
"test"
),
"fastqc"
->
Map
(
"exe"
->
"test"
),
"seqtk"
->
Map
(
"exe"
->
"test"
),
...
...
gears/src/main/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsCentrifuge.scala
View file @
0efd042b
...
...
@@ -51,7 +51,8 @@ class GearsCentrifuge(val root: Configurable) extends QScript with SummaryQScrip
centrifugeKreport
.
centrifugeOutputFiles
:+=
fifo
centrifugeKreport
.
output
=
new
File
(
outputDir
,
s
"$outputName.$name.kreport"
)
centrifugeKreport
.
onlyUnique
=
unique
add
(
new
BiopetFifoPipe
(
this
,
List
(
centrifugeKreport
,
Zcat
(
this
,
centrifugeOutput
,
fifo
))))
val
pipe
=
new
BiopetFifoPipe
(
this
,
List
(
centrifugeKreport
,
Zcat
(
this
,
centrifugeOutput
,
fifo
)))
add
(
pipe
)
val
krakenReportJSON
=
new
KrakenReportToJson
(
this
)
krakenReportJSON
.
inputReport
=
centrifugeKreport
.
output
...
...
gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsSingleTest.scala
View file @
0efd042b
...
...
@@ -234,6 +234,7 @@ object TestGearsSingle {
bam
.
deleteOnExit
()
val
executables
=
Map
(
"skip_write_dependencies"
->
true
,
"kraken"
->
Map
(
"exe"
->
"test"
,
"db"
->
"test"
),
"centrifuge"
->
Map
(
"exe"
->
"test"
,
"centrifuge_index"
->
"test"
),
"centrifugekreport"
->
Map
(
"exe"
->
"test"
),
...
...
gears/src/test/scala/nl/lumc/sasc/biopet/pipelines/gears/GearsTest.scala
View file @
0efd042b
...
...
@@ -113,6 +113,7 @@ object GearsTest {
Files
.
touch
(
bam
)
val
config
=
Map
(
"skip_write_dependencies"
->
true
,
"output_dir"
->
outputDir
,
"kraken"
->
Map
(
"exe"
->
"test"
,
"db"
->
"test"
),
"krakenreport"
->
Map
(
"exe"
->
"test"
,
"db"
->
"test"
),
...
...
generate-indexes/src/test/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/DownloadGenomesTest.scala
View file @
0efd042b
...
...
@@ -141,7 +141,9 @@ object DownloadGenomesTest {
val
outputDir
=
Files
.
createTempDir
()
outputDir
.
deleteOnExit
()
val
config
=
Map
(
"output_dir"
->
outputDir
,
val
config
=
Map
(
"skip_write_dependencies"
->
true
,
"output_dir"
->
outputDir
,
"bwa"
->
Map
(
"exe"
->
"test"
),
"star"
->
Map
(
"exe"
->
"test"
),
"hisat2build"
->
Map
(
"exe"
->
"test"
),
...
...
gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala
View file @
0efd042b
...
...
@@ -186,6 +186,7 @@ object GentrapTest {
copyFile
(
"ref.fa.fai"
)
val
executables
:
Map
[
String
,
Any
]
=
Map
(
"skip_write_dependencies"
->
true
,
"reference_fasta"
->
(
outputDir
+
File
.
separator
+
"ref.fa"
),
"refFlat"
->
(
outputDir
+
File
.
separator
+
"ref.fa"
),
"annotation_gtf"
->
(
outputDir
+
File
.
separator
+
"ref.fa"
),
...
...
gwas-test/src/test/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTestTest.scala
View file @
0efd042b
...
...
@@ -75,6 +75,7 @@ object GwasTestTest {
}
val
config
=
Map
(
"skip_write_dependencies"
->
true
,
"reference_fasta"
->
reference
.
toString
,
"phenotype_file"
->
phenotypeFile
.
toString
,
"output_dir"
->
outputDir
,
...
...
gwas-test/src/test/scala/nl/lumc/sasc/biopet/pipelines/gwastest/impute/Impute2VcfTest.scala
View file @
0efd042b
...
...
@@ -93,6 +93,7 @@ object Impute2VcfTest {
}
val
config
=
Map
(
"skip_write_dependencies"
->
true
,
"reference_fasta"
->
reference
.
toString
,
"phenotype_file"
->
phenotypeFile
.
toString
,
"output_dir"
->
outputDir
,
...
...
Prev
1
2
Next
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment