Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
fcaea52d
Commit
fcaea52d
authored
Jul 03, 2014
by
Peter van 't Hof
Browse files
Added some picard tools to biopet
parent
4deb4b69
Changes
5
Hide whitespace changes
Inline
Side-by-side
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CalculateHsMetrics.scala
0 → 100644
View file @
fcaea52d
package
nl.lumc.sasc.biopet.function.picard
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.sting.commandline.
{
Argument
,
Input
,
Output
}
class
CalculateHsMetrics
(
val
root
:
Configurable
)
extends
Picard
{
javaMainClass
=
"net.sf.picard.analysis.directed.CalculateHsMetrics"
@Input
(
doc
=
"The input SAM or BAM files to analyze. Must be coordinate sorted."
,
required
=
true
)
var
input
:
File
=
_
@Input
(
doc
=
"BAIT_INTERVALS"
,
required
=
true
)
var
baitIntervals
:
File
=
_
@Input
(
doc
=
"TARGET_INTERVALS"
,
required
=
true
)
var
targetIntervals
:
File
=
_
@Output
(
doc
=
"The output file to write statistics to"
,
required
=
true
)
var
output
:
File
=
_
@Output
(
doc
=
"PER_TARGET_COVERAGE"
,
required
=
false
)
var
perTargetCoverage
:
File
=
_
@Argument
(
doc
=
"Reference file"
,
required
=
false
)
var
reference
:
File
=
config
(
"reference"
,
""
)
@Argument
(
doc
=
"METRIC_ACCUMULATION_LEVEL"
,
required
=
false
)
var
metricAccumulationLevel
:
List
[
String
]
=
config
(
"metricaccumulationlevel"
,
List
())
@Argument
(
doc
=
"BAIT_SET_NAME"
,
required
=
false
)
var
baitSetName
:
String
=
_
override
def
commandLine
=
super
.
commandLine
+
required
(
"INPUT="
,
input
,
spaceSeparated
=
false
)
+
required
(
"OUTPUT="
,
output
,
spaceSeparated
=
false
)
+
optional
(
"REFERENCE_SEQUENCE="
,
reference
,
spaceSeparated
=
false
)
+
repeat
(
"METRIC_ACCUMULATION_LEVEL="
,
metricAccumulationLevel
,
spaceSeparated
=
false
)
+
required
(
"BAIT_INTERVALS="
,
baitIntervals
,
spaceSeparated
=
false
)
+
required
(
"TARGET_INTERVALS="
,
targetIntervals
,
spaceSeparated
=
false
)
+
optional
(
"PER_TARGET_COVERAGE="
,
perTargetCoverage
,
spaceSeparated
=
false
)
+
optional
(
"BAIT_SET_NAME="
,
baitSetName
,
spaceSeparated
=
false
)
}
object
CalculateHsMetrics
{
def
apply
(
root
:
Configurable
,
input
:
File
,
baitIntervals
:
File
,
targetIntervals
:
File
,
outputDir
:
String
)
:
CalculateHsMetrics
=
{
val
calculateHsMetrics
=
new
CalculateHsMetrics
(
root
)
calculateHsMetrics
.
input
=
input
calculateHsMetrics
.
baitIntervals
=
baitIntervals
calculateHsMetrics
.
targetIntervals
=
targetIntervals
calculateHsMetrics
.
output
=
new
File
(
outputDir
,
input
.
getName
.
stripSuffix
(
".bam"
)
+
".capmetrics"
)
calculateHsMetrics
.
perTargetCoverage
=
new
File
(
outputDir
,
input
.
getName
.
stripSuffix
(
".bam"
)
+
".per_target_coverage"
)
return
calculateHsMetrics
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectGcBiasMetrics.scala
0 → 100644
View file @
fcaea52d
package
nl.lumc.sasc.biopet.function.picard
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.sting.commandline.
{
Argument
,
Input
,
Output
}
class
CollectGcBiasMetrics
(
val
root
:
Configurable
)
extends
Picard
{
javaMainClass
=
"net.sf.picard.analysis.CollectGcBiasMetrics"
@Input
(
doc
=
"The input SAM or BAM files to analyze. Must be coordinate sorted."
,
required
=
true
)
var
input
:
Seq
[
File
]
=
Nil
@Output
(
doc
=
"The output file to write statistics to"
,
required
=
true
)
var
output
:
File
=
_
@Output
(
doc
=
"Output chart"
,
required
=
false
)
var
outputChart
:
File
=
_
@Output
(
doc
=
"Output summary"
,
required
=
false
)
var
outputSummary
:
File
=
_
@Argument
(
doc
=
"Reference file"
,
required
=
false
)
var
reference
:
File
=
config
(
"reference"
,
""
)
@Argument
(
doc
=
"Window size"
,
required
=
false
)
var
windowSize
:
Int
=
config
(
"windowsize"
,
100
)
@Argument
(
doc
=
"MINIMUM_GENOME_FRACTION"
,
required
=
false
)
var
minGenomeFraction
:
Double
=
config
(
"mingenomefraction"
,
1.0E-5
)
@Argument
(
doc
=
"ASSUME_SORTED"
,
required
=
false
)
var
assumeSorted
:
Boolean
=
config
(
"assumesorted"
,
false
)
@Argument
(
doc
=
"IS_BISULFITE_SEQUENCED"
,
required
=
false
)
var
isBisulfiteSequinced
:
Boolean
=
config
(
"isbisulfitesequinced"
,
false
)
override
def
afterGraph
{
if
(
outputChart
==
null
)
outputChart
=
new
File
(
output
+
".pdf"
)
//require(reference.exists)
}
override
def
commandLine
=
super
.
commandLine
+
repeat
(
"INPUT="
,
input
,
spaceSeparated
=
false
)
+
required
(
"OUTPUT="
,
output
,
spaceSeparated
=
false
)
+
optional
(
"CHART_OUTPUT="
,
outputChart
,
spaceSeparated
=
false
)
+
required
(
"REFERENCE_SEQUENCE="
,
reference
,
spaceSeparated
=
false
)
+
optional
(
"SUMMARY_OUTPUT="
,
outputSummary
,
spaceSeparated
=
false
)
+
optional
(
"WINDOW_SIZE="
,
windowSize
,
spaceSeparated
=
false
)
+
optional
(
"MINIMUM_GENOME_FRACTION="
,
minGenomeFraction
,
spaceSeparated
=
false
)
+
conditional
(
assumeSorted
,
"ASSUME_SORTED=TRUE"
)
+
conditional
(
isBisulfiteSequinced
,
"IS_BISULFITE_SEQUENCED=TRUE"
)
}
object
CollectGcBiasMetrics
{
def
apply
(
root
:
Configurable
,
input
:
File
,
outputDir
:
String
)
:
CollectGcBiasMetrics
=
{
val
collectGcBiasMetrics
=
new
CollectGcBiasMetrics
(
root
)
collectGcBiasMetrics
.
input
:+=
input
collectGcBiasMetrics
.
output
=
new
File
(
outputDir
,
input
.
getName
.
stripSuffix
(
".bam"
)
+
".gcbiasmetrics"
)
return
collectGcBiasMetrics
}
}
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/CollectInsertSizeMetrics.scala
0 → 100644
View file @
fcaea52d
package
nl.lumc.sasc.biopet.function.picard
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.sting.commandline.
{
Argument
,
Input
,
Output
}
class
CollectInsertSizeMetrics
(
val
root
:
Configurable
)
extends
Picard
{
javaMainClass
=
"net.sf.picard.analysis.CollectInsertSizeMetrics"
@Input
(
doc
=
"The input SAM or BAM files to analyze. Must be coordinate sorted."
,
required
=
true
)
var
input
:
File
=
_
@Output
(
doc
=
"The output file to write statistics to"
,
required
=
true
)
var
output
:
File
=
_
@Output
(
doc
=
"Output histogram"
,
required
=
true
)
var
outputHistogram
:
File
=
_
@Argument
(
doc
=
"Reference file"
,
required
=
false
)
var
reference
:
File
=
config
(
"reference"
,
""
)
@Argument
(
doc
=
"DEVIATIONS"
,
required
=
false
)
var
deviations
:
Double
=
config
(
"deviations"
,
10.0
)
@Argument
(
doc
=
"MINIMUM_PCT"
,
required
=
false
)
var
minPct
:
Double
=
config
(
"minpct"
,
0.05
)
@Argument
(
doc
=
"ASSUME_SORTED"
,
required
=
false
)
var
assumeSorted
:
Boolean
=
config
(
"assumesorted"
,
false
)
@Argument
(
doc
=
"STOP_AFTER"
,
required
=
false
)
var
stopAfter
:
Long
=
config
(
"metricaccumulationlevel"
,
0
)
@Argument
(
doc
=
"METRIC_ACCUMULATION_LEVEL"
,
required
=
false
)
var
metricAccumulationLevel
:
List
[
String
]
=
config
(
"metricaccumulationlevel"
,
List
())
@Argument
(
doc
=
"HISTOGRAM_WIDTH"
,
required
=
false
)
var
histogramWidth
:
Int
=
config
(
"histogramWidth"
,
0
)
override
def
afterGraph
{
if
(
outputHistogram
==
null
)
outputHistogram
=
new
File
(
output
+
".pdf"
)
//require(reference.exists)
}
override
def
commandLine
=
super
.
commandLine
+
required
(
"INPUT="
,
input
,
spaceSeparated
=
false
)
+
required
(
"OUTPUT="
,
output
,
spaceSeparated
=
false
)
+
optional
(
"HISTOGRAM_FILE="
,
outputHistogram
,
spaceSeparated
=
false
)
+
required
(
"REFERENCE_SEQUENCE="
,
reference
,
spaceSeparated
=
false
)
+
optional
(
"DEVIATIONS="
,
deviations
,
spaceSeparated
=
false
)
+
repeat
(
"METRIC_ACCUMULATION_LEVEL="
,
metricAccumulationLevel
,
spaceSeparated
=
false
)
+
(
if
(
stopAfter
>
0
)
optional
(
"STOP_AFTER="
,
stopAfter
,
spaceSeparated
=
false
)
else
""
)
+
(
if
(
histogramWidth
>
0
)
optional
(
"HISTOGRAM_WIDTH="
,
histogramWidth
,
spaceSeparated
=
false
)
else
""
)
+
conditional
(
assumeSorted
,
"ASSUME_SORTED=TRUE"
)
}
object
CollectInsertSizeMetrics
{
def
apply
(
root
:
Configurable
,
input
:
File
,
outputDir
:
String
)
:
CollectInsertSizeMetrics
=
{
val
collectInsertSizeMetrics
=
new
CollectInsertSizeMetrics
(
root
)
collectInsertSizeMetrics
.
input
=
input
collectInsertSizeMetrics
.
output
=
new
File
(
outputDir
,
input
.
getName
.
stripSuffix
(
".bam"
)
+
".insertsizemetrics"
)
return
collectInsertSizeMetrics
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/MarkDuplicates.scala
0 → 100644
View file @
fcaea52d
package
nl.lumc.sasc.biopet.function.picard
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
import
org.broadinstitute.sting.commandline.
{
Argument
,
Input
,
Output
}
class
MarkDuplicates
(
val
root
:
Configurable
)
extends
Picard
{
javaMainClass
=
"net.sf.picard.sam.MarkDuplicates"
@Input
(
doc
=
"The input SAM or BAM files to analyze. Must be coordinate sorted."
,
required
=
true
)
var
input
:
List
[
File
]
=
Nil
@Output
(
doc
=
"The output file to bam file to"
,
required
=
true
)
var
output
:
File
=
_
@Output
(
doc
=
"The output file to write statistics to"
,
required
=
true
)
var
outputMetrics
:
File
=
_
@Argument
(
doc
=
"PROGRAM_RECORD_ID"
,
required
=
false
)
var
programRecordId
:
String
=
if
(
configContains
(
"programrecordid"
))
config
(
"programrecordid"
)
else
null
@Argument
(
doc
=
"PROGRAM_GROUP_VERSION"
,
required
=
false
)
var
programGroupVersion
:
String
=
if
(
configContains
(
"programgroupversion"
))
config
(
"programgroupversion"
)
else
null
@Argument
(
doc
=
"PROGRAM_GROUP_COMMAND_LINE"
,
required
=
false
)
var
programGroupCommandLine
:
String
=
if
(
configContains
(
"programgroupcommandline"
))
config
(
"programgroupcommandline"
)
else
null
@Argument
(
doc
=
"PROGRAM_GROUP_NAME"
,
required
=
false
)
var
programGroupName
:
String
=
if
(
configContains
(
"programgroupname"
))
config
(
"programgroupname"
)
else
null
@Argument
(
doc
=
"COMMENT"
,
required
=
false
)
var
comment
:
String
=
if
(
configContains
(
"comment"
))
config
(
"comment"
)
else
null
@Argument
(
doc
=
"REMOVE_DUPLICATES"
,
required
=
false
)
var
removeDuplicates
:
Boolean
=
config
(
"removeduplicates"
,
false
)
@Argument
(
doc
=
"ASSUME_SORTED"
,
required
=
false
)
var
assumeSorted
:
Boolean
=
config
(
"assumesorted"
,
false
)
@Argument
(
doc
=
"MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP"
,
required
=
false
)
var
maxSequencesForDiskReadEndsMap
:
Int
=
config
(
"maxSequencesForDiskReadEndsMap"
,
50000
)
@Argument
(
doc
=
"MAX_FILE_HANDLES_FOR_READ_ENDS_MAP"
,
required
=
false
)
var
maxFileHandlesForReadEndsMap
:
Int
=
config
(
"maxFileHandlesForReadEndsMap"
,
8000
)
@Argument
(
doc
=
"SORTING_COLLECTION_SIZE_RATIO"
,
required
=
false
)
var
sortingCollectionSizeRatio
:
Double
=
config
(
"sortingCollectionSizeRatio"
,
0.25
)
@Argument
(
doc
=
"READ_NAME_REGEX"
,
required
=
false
)
var
readNameRegex
:
String
=
if
(
configContains
(
"readNameRegex"
))
config
(
"readNameRegex"
)
else
null
@Argument
(
doc
=
"OPTICAL_DUPLICATE_PIXEL_DISTANCE"
,
required
=
false
)
var
opticalDuplicatePixelDistance
:
Int
=
config
(
"opticalDuplicatePixelDistance"
,
100
)
override
def
commandLine
=
super
.
commandLine
+
repeat
(
"INPUT="
,
input
,
spaceSeparated
=
false
)
+
required
(
"OUTPUT="
,
output
,
spaceSeparated
=
false
)
+
required
(
"METRICS_FILE="
,
outputMetrics
,
spaceSeparated
=
false
)
+
optional
(
"PROGRAM_RECORD_ID="
,
programRecordId
,
spaceSeparated
=
false
)
+
optional
(
"PROGRAM_GROUP_VERSION="
,
programGroupVersion
,
spaceSeparated
=
false
)
+
optional
(
"PROGRAM_GROUP_COMMAND_LINE="
,
programGroupCommandLine
,
spaceSeparated
=
false
)
+
optional
(
"PROGRAM_GROUP_NAME="
,
programGroupName
,
spaceSeparated
=
false
)
+
optional
(
"COMMENT="
,
comment
,
spaceSeparated
=
false
)
+
conditional
(
removeDuplicates
,
"REMOVE_DUPLICATES=TRUE"
)
+
conditional
(
assumeSorted
,
"ASSUME_SORTED=TRUE"
)
+
(
if
(
maxSequencesForDiskReadEndsMap
>
0
)
optional
(
"MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP="
,
maxSequencesForDiskReadEndsMap
,
spaceSeparated
=
false
)
else
""
)
+
(
if
(
maxFileHandlesForReadEndsMap
>
0
)
optional
(
"MAX_FILE_HANDLES_FOR_READ_ENDS_MAP="
,
maxFileHandlesForReadEndsMap
,
spaceSeparated
=
false
)
else
""
)
+
(
if
(
sortingCollectionSizeRatio
>
0
)
optional
(
"SORTING_COLLECTION_SIZE_RATIO="
,
sortingCollectionSizeRatio
,
spaceSeparated
=
false
)
else
""
)
+
optional
(
"READ_NAME_REGEX="
,
readNameRegex
,
spaceSeparated
=
false
)
+
(
if
(
opticalDuplicatePixelDistance
>
0
)
optional
(
"OPTICAL_DUPLICATE_PIXEL_DISTANCE="
,
opticalDuplicatePixelDistance
,
spaceSeparated
=
false
)
else
""
)
}
object
MarkDuplicates
{
def
apply
(
root
:
Configurable
,
input
:
List
[
File
],
outputDir
:
String
)
:
MarkDuplicates
=
{
val
markDuplicates
=
new
MarkDuplicates
(
root
)
markDuplicates
.
input
=
input
markDuplicates
.
output
=
new
File
(
outputDir
,
input
.
head
.
getName
.
stripSuffix
(
".bam"
)
+
".dedup.bam"
)
markDuplicates
.
outputMetrics
=
new
File
(
outputDir
,
input
.
head
.
getName
.
stripSuffix
(
".bam"
)
+
".dedup.metrics"
)
return
markDuplicates
}
def
apply
(
root
:
Configurable
,
input
:
List
[
File
],
output
:
File
)
:
MarkDuplicates
=
{
val
markDuplicates
=
new
MarkDuplicates
(
root
)
markDuplicates
.
input
=
input
markDuplicates
.
output
=
output
markDuplicates
.
outputMetrics
=
new
File
(
output
.
getParent
,
output
.
getName
.
stripSuffix
(
".bam"
)
+
".metrics"
)
return
markDuplicates
}
}
\ No newline at end of file
biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/picard/Picard.scala
0 → 100644
View file @
fcaea52d
package
nl.lumc.sasc.biopet.function.picard
import
nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import
org.broadinstitute.sting.commandline._
abstract
class
Picard
extends
BiopetJavaCommandLineFunction
{
@Argument
(
doc
=
"VERBOSITY"
,
required
=
false
)
var
verbosity
:
String
=
config
(
"verbosity"
,
"INFO"
,
"picard"
)
@Argument
(
doc
=
"QUIET"
,
required
=
false
)
var
quiet
:
Boolean
=
config
(
"quiet"
,
false
,
"picard"
)
@Argument
(
doc
=
"VALIDATION_STRINGENCY"
,
required
=
false
)
var
stringency
:
String
=
config
(
"validationstringency"
,
"STRICT"
,
"picard"
)
@Argument
(
doc
=
"COMPRESSION_LEVEL"
,
required
=
false
)
var
compression
:
Int
=
config
(
"compressionlevel"
,
5
,
"picard"
)
@Argument
(
doc
=
"MAX_RECORDS_IN_RAM"
,
required
=
false
)
var
maxRecordsInRam
:
Int
=
config
(
"maxrecordsinram"
,
500000
,
"picard"
)
@Argument
(
doc
=
"CREATE_INDEX"
,
required
=
false
)
var
createIndex
:
Boolean
=
config
(
"createindex"
,
true
,
"picard"
)
@Argument
(
doc
=
"CREATE_MD5_FILE"
,
required
=
false
)
var
createMd5
:
Boolean
=
config
(
"createmd5"
,
false
,
"picard"
)
override
def
versionCommand
=
executeble
+
" "
+
javaOpts
+
" "
+
javaExecutable
+
" -h"
override
val
versionRegex
=
"""Version: (.*)"""
.
r
override
val
versionExitcode
=
List
(
0
,
1
)
override
val
defaultVmem
=
"8G"
memoryLimit
=
Option
(
5.0
)
override
def
commandLine
=
super
.
commandLine
+
required
(
"TMP_DIR="
+
jobTempDir
)
+
optional
(
"VERBOSITY="
,
verbosity
,
spaceSeparated
=
false
)
+
conditional
(
quiet
,
"QUIET=TRUE"
)
+
optional
(
"VALIDATION_STRINGENCY="
,
stringency
,
spaceSeparated
=
false
)
+
optional
(
"COMPRESSION_LEVEL="
,
compression
,
spaceSeparated
=
false
)
+
optional
(
"MAX_RECORDS_IN_RAM="
,
maxRecordsInRam
,
spaceSeparated
=
false
)
+
conditional
(
createIndex
,
"CREATE_INDEX=TRUE"
)
+
conditional
(
createMd5
,
"CREATE_MD5_FILE=TRUE"
)
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment