Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
3fa85cef
Commit
3fa85cef
authored
Jul 07, 2017
by
Sander Bollen
Committed by
GitHub
Jul 07, 2017
Browse files
Merge branch 'develop' into feature-tarmac-pipeline
parents
cb03dd7b
c99aa979
Changes
136
Hide whitespace changes
Inline
Side-by-side
Jenkinsfile
View file @
3fa85cef
...
...
@@ -21,10 +21,6 @@ node('local') {
}
}
stage
(
'Report Tests'
)
{
junit
'*/target/surefire-reports/*.xml'
}
stage
(
'Check git on changes'
)
{
sh
'if [ $(git diff | wc -l) -eq 0 ]; then true; else echo "[ERROR] Git is not clean anymore after build"; git diff; echo "[ERROR] This might be caused by reformated code, if so run maven locally"; false; fi'
}
...
...
bammetrics/pom.xml
View file @
3fa85cef
...
...
@@ -39,7 +39,7 @@
</dependency>
<dependency>
<groupId>
nl.lumc.sasc
</groupId>
<artifactId>
Biopet
Tools
Extensions
</artifactId>
<artifactId>
BiopetExtensions
</artifactId>
<version>
${project.version}
</version>
</dependency>
</dependencies>
...
...
bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
View file @
3fa85cef
...
...
@@ -42,6 +42,9 @@ class BamMetrics(val parent: Configurable)
@Input
(
doc
=
"Bam File"
,
shortName
=
"BAM"
,
required
=
true
)
var
inputBam
:
File
=
_
@Argument
(
required
=
false
)
var
paired
:
Boolean
=
true
override
def
defaults
=
Map
(
"bedtoolscoverage"
->
Map
(
"sorted"
->
true
))
/** returns files to store in summary */
...
...
@@ -87,6 +90,9 @@ class BamMetrics(val parent: Configurable)
val
multiMetrics
=
new
CollectMultipleMetrics
(
this
)
multiMetrics
.
input
=
inputBam
multiMetrics
.
outputName
=
new
File
(
outputDir
,
inputBam
.
getName
.
stripSuffix
(
".bam"
))
if
(!
paired
)
multiMetrics
.
program
=
multiMetrics
.
program
.
filter
(
_
!=
CollectMultipleMetrics
.
Programs
.
CollectInsertSizeMetrics
)
add
(
multiMetrics
)
addSummarizable
(
multiMetrics
,
"multi_metrics"
)
...
...
@@ -212,6 +218,7 @@ object BamMetrics extends PipelineCommand {
def
apply
(
root
:
Configurable
,
bamFile
:
File
,
outputDir
:
File
,
paired
:
Boolean
,
sampleId
:
Option
[
String
]
=
None
,
libId
:
Option
[
String
]
=
None
)
:
BamMetrics
=
{
val
bamMetrics
=
new
BamMetrics
(
root
)
...
...
@@ -219,6 +226,7 @@ object BamMetrics extends PipelineCommand {
bamMetrics
.
libId
=
libId
bamMetrics
.
inputBam
=
bamFile
bamMetrics
.
outputDir
=
outputDir
bamMetrics
.
paired
=
paired
bamMetrics
.
init
()
bamMetrics
.
biopetScript
()
...
...
bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/scripts/CoverageStats.scala
View file @
3fa85cef
...
...
@@ -39,7 +39,7 @@ class CoverageStats(val parent: Configurable) extends PythonCommandLineFunction
override
def
defaultCoreMemory
=
9.0
def
cmdLine
=
def
cmdLine
:
String
=
getPythonCommand
+
(
if
(
inputAsStdin
)
" - "
else
required
(
input
))
+
required
(
"--plot"
,
plot
)
+
...
...
biopet-core/pom.xml
View file @
3fa85cef
...
...
@@ -35,6 +35,11 @@
<artifactId>
BiopetUtils
</artifactId>
<version>
${project.version}
</version>
</dependency>
<dependency>
<groupId>
nl.lumc.sasc
</groupId>
<artifactId>
BiopetTools
</artifactId>
<version>
${project.version}
</version>
</dependency>
<dependency>
<groupId>
org.broadinstitute.gatk
</groupId>
<artifactId>
gatk-queue
</artifactId>
...
...
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala
View file @
3fa85cef
...
...
@@ -44,7 +44,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
val
preCommands
:
List
[
String
]
=
config
(
"pre_commands"
,
default
=
Nil
,
freeVar
=
false
)
pr
ivate
def
changeScript
(
file
:
File
)
:
Unit
=
{
pr
otected
def
changeScript
(
file
:
File
)
:
Unit
=
{
val
lines
=
Source
.
fromFile
(
file
).
getLines
().
toList
val
writer
=
new
PrintWriter
(
file
)
remoteCommand
match
{
...
...
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetFifoPipe.scala
View file @
3fa85cef
...
...
@@ -14,11 +14,14 @@
*/
package
nl.lumc.sasc.biopet.core
import
java.io.File
import
java.io.
{
File
,
PrintWriter
}
import
nl.lumc.sasc.biopet.utils.Logging
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.Output
import
scala.io.Source
/**
* Created by pjvan_thof on 9/29/15.
*/
...
...
@@ -30,13 +33,13 @@ class BiopetFifoPipe(val parent: Configurable,
val
outputs
:
Map
[
BiopetCommandLineFunction
,
Seq
[
File
]]
=
try
{
commands
.
map
(
x
=>
x
->
x
.
outputs
).
toMap
}
catch
{
case
e
:
NullPointerException
=>
Map
()
case
_
:
NullPointerException
=>
Map
()
}
val
inputs
:
Map
[
BiopetCommandLineFunction
,
Seq
[
File
]]
=
try
{
commands
.
map
(
x
=>
x
->
x
.
inputs
).
toMap
}
catch
{
case
e
:
NullPointerException
=>
Map
()
case
_
:
NullPointerException
=>
Map
()
}
for
(
cmdOutput
<-
commands
;
...
...
@@ -52,13 +55,13 @@ class BiopetFifoPipe(val parent: Configurable,
val
outputs
:
Map
[
BiopetCommandLineFunction
,
Seq
[
File
]]
=
try
{
commands
.
map
(
x
=>
x
->
x
.
outputs
).
toMap
}
catch
{
case
e
:
NullPointerException
=>
Map
()
case
_
:
NullPointerException
=>
Map
()
}
val
inputs
:
Map
[
BiopetCommandLineFunction
,
Seq
[
File
]]
=
try
{
commands
.
map
(
x
=>
x
->
x
.
inputs
).
toMap
}
catch
{
case
e
:
NullPointerException
=>
Map
()
case
_
:
NullPointerException
=>
Map
()
}
val
fifoFiles
=
fifos
...
...
@@ -83,14 +86,16 @@ class BiopetFifoPipe(val parent: Configurable,
}
}
def
cmdLine
=
{
val
fifosFiles
=
this
.
fifos
fifosFiles
.
filter
(
_
.
exists
()).
map
(
required
(
"rm"
,
_
)).
mkString
(
"\n\n"
,
" \n"
,
" \n\n"
)
+
fifosFiles
.
map
(
required
(
"mkfifo"
,
_
)).
mkString
(
"\n\n"
,
"\n"
,
"\n\n"
)
+
commands
.
map
(
_
.
commandLine
).
mkString
(
"\n\n"
,
" & \n"
,
" & \n\n"
)
+
BiopetFifoPipe
.
waitScript
+
fifosFiles
.
map
(
required
(
"rm"
,
_
)).
mkString
(
"\n\n"
,
" \n"
,
" \n\n"
)
+
BiopetFifoPipe
.
endScript
def
cmdLine
:
String
=
{
this
.
fifos
.
filter
(
_
.
exists
()).
map
(
required
(
"rm"
,
_
)).
mkString
(
""
,
"\n"
,
"\n"
)
+
this
.
fifos
.
map
(
required
(
"mkfifo"
,
_
)).
mkString
(
"\n"
)
+
commands
.
map
(
_
.
commandLine
).
mkString
(
"\n"
,
" & \n"
,
" & \n"
)
}
/** This will add the control code to the script for fifo pipes */
override
protected
def
changeScript
(
file
:
File
)
:
Unit
=
{
super
.
changeScript
(
file
)
BiopetFifoPipe
.
changeScript
(
file
,
fifos
)
}
override
def
setResources
()
:
Unit
=
{
...
...
@@ -110,7 +115,27 @@ class BiopetFifoPipe(val parent: Configurable,
}
object
BiopetFifoPipe
{
val
waitScript
=
/** This will add the control code to the script for fifo pipes */
def
changeScript
(
file
:
File
,
fifos
:
List
[
File
])
:
Unit
=
{
val
reader
=
Source
.
fromFile
(
file
)
val
lines
=
reader
.
getLines
().
toList
reader
.
close
()
val
writer
=
new
PrintWriter
(
file
)
lines
.
foreach
(
writer
.
println
)
writer
.
println
(
BiopetFifoPipe
.
waitScript
)
writer
.
println
(
fifos
.
map
(
"rm "
+
_
).
mkString
(
" \n"
))
writer
.
println
(
BiopetFifoPipe
.
endScript
)
writer
.
close
()
if
(
Logging
.
logger
.
isDebugEnabled
)
{
val
reader
=
Source
.
fromFile
(
file
)
Logging
.
logger
.
debug
(
s
"Content of script $file:\n"
+
reader
.
getLines
().
mkString
(
"\n"
))
reader
.
close
()
}
}
val
waitScript
:
String
=
"""
|
|allJobs=`jobs -p`
...
...
@@ -156,7 +181,7 @@ object BiopetFifoPipe {
|
"""
.
stripMargin
val
endScript
=
val
endScript
:
String
=
"""
|
|if [ "$FAIL" == "0" ];
...
...
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala
View file @
3fa85cef
...
...
@@ -127,7 +127,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
logger
.
info
(
"Checking input files"
)
inputFiles
.
par
.
foreach
{
i
=>
if
(!
i
.
file
.
exists
())
Logging
.
addError
(
s
"Input file does not exist: ${i.file}"
)
if
(!
i
.
file
.
canRead
)
Logging
.
addError
(
s
"Input file can not be read: ${i.file}"
)
else
if
(!
i
.
file
.
canRead
)
Logging
.
addError
(
s
"Input file can not be read: ${i.file}"
)
if
(!
i
.
file
.
isAbsolute
)
Logging
.
addError
(
s
"Input file should be an absolute path: ${i.file}"
)
}
...
...
shiva
/src/main/scala/nl/lumc/sasc/biopet/
pipelines/shiva
/CheckValidateVcf.scala
→
biopet-core
/src/main/scala/nl/lumc/sasc/biopet/
core/extensions
/CheckValidateVcf.scala
View file @
3fa85cef
...
...
@@ -12,10 +12,11 @@
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package
nl.lumc.sasc.biopet.
pipelines.shiva
package
nl.lumc.sasc.biopet.
core.extensions
import
java.io.File
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.queue.function.InProcessFunction
import
org.broadinstitute.gatk.utils.commandline.Input
...
...
@@ -26,20 +27,32 @@ import scala.io.Source
*
* Created by pjvanthof on 16/08/15.
*/
class
CheckValidateVcf
extends
InProcessFunction
{
class
CheckValidateVcf
(
val
parent
:
Configurable
)
extends
InProcessFunction
with
Configurable
{
@Input
(
required
=
true
)
var
inputLogFile
:
File
=
_
val
abortOnError
:
Boolean
=
config
(
"abort_on_error"
,
default
=
true
)
var
species
:
String
=
""
var
genomeName
:
String
=
""
/** Exits whenever the input md5sum is not the same as the output md5sum */
def
run
:
Unit
=
{
def
run
()
:
Unit
=
{
val
reader
=
Source
.
fromFile
(
inputLogFile
)
reader
.
getLines
().
foreach
{
line
=>
if
(
line
.
startsWith
(
"ERROR"
))
{
logger
.
error
(
"Corrupt vcf file found, aborting pipeline"
)
if
(
abortOnError
)
{
logger
.
error
(
"Corrupt vcf file found, aborting pipeline"
)
// 130 Simulates a ctr-C
Runtime
.
getRuntime
.
halt
(
130
)
// 130 Simulates a ctr-C
Runtime
.
getRuntime
.
halt
(
130
)
}
else
{
logger
.
warn
(
s
"Corrupt vcf file found for $species-$genomeName, for details see $inputLogFile"
)
}
}
}
reader
.
close
()
...
...
biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/PythonCommandLineFunction.scala
View file @
3fa85cef
...
...
@@ -19,6 +19,7 @@ import java.io.{File, FileOutputStream}
import
nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import
nl.lumc.sasc.biopet.utils.Logging
import
org.broadinstitute.gatk.utils.commandline.Input
import
scala.collection.mutable
trait
PythonCommandLineFunction
extends
BiopetCommandLineFunction
{
@Input
(
doc
=
"Python script"
,
required
=
false
)
...
...
@@ -34,10 +35,13 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
*/
def
setPythonScript
(
script
:
String
)
{
pythonScript
=
new
File
(
script
).
getAbsoluteFile
if
(!
p
ython
Script
.
exists
(
))
{
if
(!
P
ython
CommandLineFunction
.
alreadyCopied
.
contains
((
this
.
getClass
,
script
)
))
{
setPythonScript
(
script
,
""
)
this
.
getClass
PythonCommandLineFunction
.
alreadyCopied
+=
(
this
.
getClass
,
script
)
->
pythonScript
}
else
{
pythonScriptName
=
script
pythonScript
=
PythonCommandLineFunction
.
alreadyCopied
((
this
.
getClass
,
script
))
}
}
...
...
@@ -48,14 +52,18 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
*/
def
setPythonScript
(
script
:
String
,
subpackage
:
String
)
{
pythonScriptName
=
script
pythonScript
=
new
File
(
".queue/tmp/"
+
subpackage
+
pythonScriptName
).
getAbsoluteFile
if
(!
pythonScript
.
getParentFile
.
exists
)
pythonScript
.
getParentFile
.
mkdirs
val
is
=
getClass
.
getResourceAsStream
(
subpackage
+
pythonScriptName
)
if
(
is
!=
null
)
{
val
os
=
new
FileOutputStream
(
pythonScript
)
org
.
apache
.
commons
.
io
.
IOUtils
.
copy
(
is
,
os
)
os
.
close
()
}
else
Logging
.
addError
(
s
"Python script not found: $pythonScriptName"
)
if
(
new
File
(
script
).
isAbsolute
&&
new
File
(
script
).
exists
())
{
pythonScript
=
new
File
(
script
)
}
else
{
pythonScript
=
new
File
(
".queue/tmp/"
+
subpackage
+
pythonScriptName
).
getAbsoluteFile
if
(!
pythonScript
.
getParentFile
.
exists
)
pythonScript
.
getParentFile
.
mkdirs
val
is
=
getClass
.
getResourceAsStream
(
subpackage
+
pythonScriptName
)
if
(
is
!=
null
)
{
val
os
=
new
FileOutputStream
(
pythonScript
)
org
.
apache
.
commons
.
io
.
IOUtils
.
copy
(
is
,
os
)
os
.
close
()
}
else
Logging
.
addError
(
s
"Python script not found: $pythonScriptName"
)
}
}
/** return basic command to prefix the complete command with */
...
...
@@ -63,3 +71,7 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
required
(
executable
)
+
required
(
pythonScript
)
}
}
object
PythonCommandLineFunction
{
private
val
alreadyCopied
:
mutable.Map
[(
Class
[
_
]
,
String
)
,
File
]
=
mutable
.
Map
()
}
biopet-
tools-extensions
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BamStats.scala
→
biopet-
core
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BamStats.scala
View file @
3fa85cef
File moved
biopet-
tools-extensions
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BaseCounter.scala
→
biopet-
core
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BaseCounter.scala
View file @
3fa85cef
File moved
biopet-
tools-extensions
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BastyGenerateFasta.scala
→
biopet-
core
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BastyGenerateFasta.scala
View file @
3fa85cef
File moved
biopet-
tools-extensions
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedToInterval.scala
→
biopet-
core
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedToInterval.scala
View file @
3fa85cef
File moved
biopet-
tools-extensions
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedtoolsCoverageToCounts.scala
→
biopet-
core
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BedtoolsCoverageToCounts.scala
View file @
3fa85cef
File moved
biopet-
tools-extensions
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BiopetFlagstat.scala
→
biopet-
core
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/BiopetFlagstat.scala
View file @
3fa85cef
File moved
biopet-core/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/CheckValidateAnnotation.scala
0 → 100644
View file @
3fa85cef
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package
nl.lumc.sasc.biopet.extensions.tools
import
java.io.File
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.queue.function.InProcessFunction
import
org.broadinstitute.gatk.utils.commandline.Input
import
scala.io.Source
/**
* This class checks results of [[nl.lumc.sasc.biopet.tools.ValidateVcf]] and aborts the pipeline when a error was been found
*
* Created by pjvanthof on 16/08/15.
*/
class
CheckValidateAnnotation
(
val
parent
:
Configurable
)
extends
InProcessFunction
with
Configurable
{
@Input
(
required
=
true
)
var
inputLogFile
:
File
=
_
val
abortOnError
:
Boolean
=
config
(
"abort_on_error"
,
default
=
true
)
var
species
:
String
=
""
var
genomeName
:
String
=
""
/** Exits whenever the input md5sum is not the same as the output md5sum */
def
run
:
Unit
=
{
val
reader
=
Source
.
fromFile
(
inputLogFile
)
reader
.
getLines
().
foreach
{
line
=>
if
(
line
.
startsWith
(
"ERROR"
))
{
// 130 Simulates a ctr-C
if
(
abortOnError
)
{
logger
.
error
(
"Corrupt annotations files found, aborting pipeline"
)
Runtime
.
getRuntime
.
halt
(
130
)
}
else
{
logger
.
warn
(
s
"Corrupt annotations files found for $species-$genomeName"
)
logger
.
warn
(
"**** You enabled a unsafe method by letting the pipeline continue with incorrect annotations files ****"
)
}
}
}
reader
.
close
()
}
}
biopet-
tools-extensions
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/DownloadNcbiAssembly.scala
→
biopet-
core
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/DownloadNcbiAssembly.scala
View file @
3fa85cef
...
...
@@ -20,9 +20,6 @@ import nl.lumc.sasc.biopet.core.ToolCommandFunction
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
/**
* @deprecated Use picard.util.BedToIntervalList instead
*/
class
DownloadNcbiAssembly
(
val
parent
:
Configurable
)
extends
ToolCommandFunction
{
def
toolObject
=
nl
.
lumc
.
sasc
.
biopet
.
tools
.
DownloadNcbiAssembly
...
...
biopet-
tools-extensions
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSplitter.scala
→
biopet-
core
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSplitter.scala
View file @
3fa85cef
File moved
biopet-
tools-extensions
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSync.scala
→
biopet-
core
/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/FastqSync.scala
View file @
3fa85cef
...
...
@@ -74,7 +74,7 @@ class FastqSync(val parent: Configurable) extends ToolCommandFunction with Summa
val
regex
=
new
Regex
(
"""Filtered (\d*) reads from first read file.
|Filtered (\d*) reads from second read file.
|Synced
read
files contain (\d*) reads."""
.
stripMargin
,
|Synced files contain (\d*) reads."""
.
stripMargin
,
"R1"
,
"R2"
,
"RL"
...
...
Prev
1
2
3
4
5
…
7
Next
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment