Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
biopet.biopet
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Analyze
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Mirrors
biopet.biopet
Commits
0f475cfa
Commit
0f475cfa
authored
10 years ago
by
Wai Yi Leung
Browse files
Options
Downloads
Patches
Plain Diff
Refactor Yamsvp to new sample/lib model
parent
a283a8f3
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala
+80
-129
80 additions, 129 deletions
...n/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala
with
80 additions
and
129 deletions
public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala
+
80
−
129
View file @
0f475cfa
...
...
@@ -19,13 +19,15 @@
package
nl.lumc.sasc.biopet.pipelines.yamsvp
import
java.io.File
import
nl.lumc.sasc.biopet.core.config.Configurable
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
MultiSampleQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.extensions.Ln
import
nl.lumc.sasc.biopet.extensions.igvtools.IGVToolsCount
import
nl.lumc.sasc.biopet.extensions.sambamba.
{
SambambaIndex
,
SambambaMerge
,
SambambaMarkdup
}
import
nl.lumc.sasc.biopet.extensions.svcallers.pindel.Pindel
import
nl.lumc.sasc.biopet.extensions.sambamba.
{
SambambaMerge
,
SambambaMarkdup
}
//
import nl.lumc.sasc.biopet.extensions.svcallers.pindel.Pindel
import
nl.lumc.sasc.biopet.extensions.svcallers.
{
Breakdancer
,
Delly
,
CleverCaller
}
import
nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
...
...
@@ -35,163 +37,112 @@ import org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.queue.function._
import
org.broadinstitute.gatk.queue.engine.JobRunInfo
class
Yamsvp
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
//with MultiSampleQScript {
class
Yamsvp
(
val
root
:
Configurable
)
extends
QScript
with
MultiSampleQScript
{
qscript
=>
def
this
()
=
this
(
null
)
var
reference
:
File
=
config
(
"reference"
,
required
=
true
)
var
finalBamFiles
:
List
[
File
]
=
Nil
/*
class LibraryOutput extends AbstractLibraryOutput {
var mappedBamFile: File = _
}
class SampleOutput extends AbstractSampleOutput {
var vcf: Map[String, List[File]] = Map()
var mappedBamFile: File = _
}
*/
override
def
init
()
{
if
(
outputDir
==
null
)
throw
new
IllegalStateException
(
"Output directory is not specified in the config / argument"
)
else
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
}
def
makeSample
(
id
:
String
)
=
new
Sample
(
id
)
class
Sample
(
sampleId
:
String
)
extends
AbstractSample
(
sampleId
)
{
def
biopetScript
()
{
// write the pipeline here
// start with QC, alignment, call sambamba, call sv callers, reporting
val
alignmentDir
:
String
=
sampleDir
+
"alignment/"
val
svcallingDir
:
String
=
sampleDir
+
"svcalls/"
// read config and set all parameters for the pipeline
logger
.
info
(
"Starting YAM SV Pipeline"
)
//runSamplesJobs
//
def
makeLibrary
(
id
:
String
)
=
new
Library
(
id
)
class
Library
(
libraryId
:
String
)
extends
AbstractLibrary
(
libraryId
)
{
}
// val runDir: String = alignmentDir + "run_" + libraryId + "/"
override
def
onExecutionDone
(
jobs
:
Map
[
QFunction
,
JobRunInfo
],
success
:
Boolean
)
{
logger
.
info
(
"YAM SV Pipeline has run ......................."
)
}
/*
def runSingleSampleJobs(sampleID: String): SampleOutput = {
val sampleOutput = new SampleOutput
var libraryBamfiles: List[File] = List()
var outputFiles: Map[String, List[File]] = Map()
var libraryFastqFiles: List[File] = List()
val sampleDir: String = outputDir + sampleID + "/"
val alignmentDir: String = sampleDir + "alignment/"
val
mapping
=
new
Mapping
(
qscript
)
mapping
.
libraryId
=
libraryId
mapping
.
sampleId
=
sampleId
val svcallingDir: String = sampleDir + "svcalls/"
protected
def
addJobs
()
:
Unit
=
{
mapping
.
input_R1
=
config
(
"R1"
,
required
=
true
)
mapping
.
input_R2
=
config
(
"R2"
,
required
=
true
)
mapping
.
outputDir
=
libDir
sampleOutput.libraries = runLibraryJobs(sampleID)
for ((libraryID, libraryOutput) <- sampleOutput.libraries) {
// this is extending the libraryBamfiles list like '~=' in D or .append in Python or .push_back in C++
libraryBamfiles ++= List(libraryOutput.mappedBamFile)
mapping
.
init
mapping
.
biopetScript
qscript
.
addAll
(
mapping
.
functions
)
}
}
val bamFile: File =
if (libraryBamfiles.size == 1) {
// When the sample has only 1 run, make a link in the main alignment directory
val alignmentlink = Ln(this, libraryBamfiles.head,
alignmentDir + sampleID + ".merged.bam", true)
add(alignmentlink, isIntermediate=true)
protected
def
addJobs
()
:
Unit
=
{
addLibsJobs
()
val
libraryBamfiles
=
libraries
.
map
(
_
.
_2
.
mapping
.
finalBamFile
).
toList
val
bamFile
:
File
=
if
(
libraryBamfiles
.
size
==
1
)
{
val
alignmentlink
=
Ln
(
qscript
,
libraryBamfiles
.
head
,
alignmentDir
+
sampleId
+
".merged.bam"
,
true
)
alignmentlink
.
isIntermediate
=
true
add
(
alignmentlink
)
alignmentlink
.
out
}
else
if
(
libraryBamfiles
.
size
>
1
)
{
val mergeSamFiles = new SambambaMerge(
this
)
val
mergeSamFiles
=
new
SambambaMerge
(
qscript
)
mergeSamFiles
.
input
=
libraryBamfiles
mergeSamFiles.output = alignmentDir + sampleID + ".merged.bam"
add(mergeSamFiles, isIntermediate=true)
mergeSamFiles
.
output
=
sampleDir
+
sampleId
+
".merged.bam"
mergeSamFiles
.
isIntermediate
=
true
add
(
mergeSamFiles
)
mergeSamFiles
.
output
}
else
null
val bamMarkDup = SambambaMarkdup(
this
, bamFile)
add(bamMarkDup)
val
bamMarkDup
=
SambambaMarkdup
(
qscript
,
bamFile
)
add
(
bamMarkDup
)
addAll(BamMetrics(
this
, bamMarkDup.output, alignmentDir + "metrics
/
").functions)
addAll
(
BamMetrics
(
qscript
,
bamMarkDup
.
output
,
alignmentDir
+
"metrics"
+
File
.
separator
).
functions
)
// create an IGV TDF file
val tdfCount = IGVToolsCount(
this
, bamMarkDup.output, config("genomename", default = "hg19"))
add(tdfCount)
// create an IGV TDF file
val
tdfCount
=
IGVToolsCount
(
qscript
,
bamMarkDup
.
output
,
config
(
"genome
_
name"
,
default
=
"hg19"
))
add
(
tdfCount
)
/// bamfile will be used as input for the SV callers. First run Clever
// val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf"
/// bamfile will be used as input for the SV callers. First run Clever
// val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf"
val cleverDir = svcallingDir + sampleID + ".clever/"
val clever = CleverCaller(this, bamMarkDup.output, this.reference, svcallingDir, cleverDir)
sampleOutput.vcf += ("clever" -> List(clever.outputvcf))
add(clever)
val
cleverDir
=
svcallingDir
+
sampleId
+
".clever/"
val
clever
=
CleverCaller
(
qscript
,
bamMarkDup
.
output
,
qscript
.
reference
,
svcallingDir
,
cleverDir
)
add
(
clever
)
val clever_vcf = Ln(
this
, clever.outputvcf, svcallingDir + sampleI
D
+ ".clever.vcf", relative = true)
add(clever_vcf)
val
clever_vcf
=
Ln
(
qscript
,
clever
.
outputvcf
,
svcallingDir
+
sampleI
d
+
".clever.vcf"
,
relative
=
true
)
add
(
clever_vcf
)
val breakdancerDir = svcallingDir + sampleID + ".breakdancer/"
val breakdancer = Breakdancer(this, bamMarkDup.output, this.reference, breakdancerDir)
sampleOutput.vcf += ("breakdancer" -> List(breakdancer.outputvcf))
addAll(breakdancer.functions)
val
breakdancerDir
=
svcallingDir
+
sampleId
+
".breakdancer/"
val
breakdancer
=
Breakdancer
(
qscript
,
bamMarkDup
.
output
,
qscript
.
reference
,
breakdancerDir
)
addAll
(
breakdancer
.
functions
)
val bd_vcf = Ln(
this
, breakdancer.outputvcf, svcallingDir + sampleI
D
+ ".breakdancer.vcf", relative = true)
add(bd_vcf)
val
bd_vcf
=
Ln
(
qscript
,
breakdancer
.
outputvcf
,
svcallingDir
+
sampleI
d
+
".breakdancer.vcf"
,
relative
=
true
)
add
(
bd_vcf
)
val dellyDir = svcallingDir + sampleID + ".delly/"
val delly = Delly(this, bamMarkDup.output, dellyDir)
sampleOutput.vcf += ("delly" -> List(delly.outputvcf))
addAll(delly.functions)
val
dellyDir
=
svcallingDir
+
sampleId
+
".delly/"
val
delly
=
Delly
(
qscript
,
bamMarkDup
.
output
,
dellyDir
)
addAll
(
delly
.
functions
)
val
delly_vcf
=
Ln
(
qscript
,
delly
.
outputvcf
,
svcallingDir
+
sampleId
+
".delly.vcf"
,
relative
=
true
)
add
(
delly_vcf
)
// for pindel we should use per library config collected into one config file
// val pindelDir = svcallingDir + sampleID + ".pindel/"
// val pindel = Pindel(qscript, analysisBam, this.reference, pindelDir)
// sampleOutput.vcf += ("pindel" -> List(pindel.outputvcf))
// addAll(pindel.functions)
//
// val pindel_vcf = Ln(qscript, pindel.outputvcf, svcallingDir + sampleID + ".pindel.vcf", relative = true)
// add(pindel_vcf)
//
}
}
val delly_vcf = Ln(this, delly.outputvcf, svcallingDir + sampleID + ".delly.vcf", relative = true)
add(delly_vcf)
def
init
()
{
}
// for pindel we should use per library config collected into one config file
// val pindelDir = svcallingDir + sampleID + ".pindel/"
// val pindel = Pindel(this, analysisBam, this.reference, pindelDir)
// sampleOutput.vcf += ("pindel" -> List(pindel.outputvcf))
// addAll(pindel.functions)
//
// val pindel_vcf = Ln(this, pindel.outputvcf, svcallingDir + sampleID + ".pindel.vcf", relative = true)
// add(pindel_vcf)
//
return sampleOutput
def
biopetScript
()
{
logger
.
info
(
"Starting YAM SV Pipeline"
)
addSamplesJobs
}
// Called for each run from a sample
def runSingleLibraryJobs(libraryId: String, sampleID: String): LibraryOutput = {
val libraryOutput = new LibraryOutput
val alignmentDir: String = outputDir + sampleID + "/alignment/"
val runDir: String = alignmentDir + "run_" + libraryId + "/"
if (config.contains("R1")) {
val mapping = new Mapping(this)
// TODO: check and test config[aligner] in json
// yamsvp/aligner -> value
// this setting causes error if not defined?
mapping.aligner = config("aligner", default = "bwa")
mapping.skipFlexiprep = false
mapping.skipMarkduplicates = true // we do the dedup marking using Sambamba
mapping.input_R1 = config("R1")
mapping.input_R2 = config("R2")
mapping.paired = (mapping.input_R2 != null)
mapping.RGLB = libraryId
mapping.RGSM = sampleID
mapping.RGPL = config("PL")
mapping.RGPU = config("PU")
mapping.RGCN = config("CN")
mapping.outputDir = runDir
mapping.init
mapping.biopetScript
addAll(mapping.functions)
// start sambamba dedup
libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
} else this.logger.error("Sample: " + sampleID + ": No R1 found for library: " + libraryId)
return libraryOutput
// logger.debug(outputFiles)
// return outputFiles
override
def
onExecutionDone
(
jobs
:
Map
[
QFunction
,
JobRunInfo
],
success
:
Boolean
)
{
logger
.
info
(
"YAM SV Pipeline has run ......................."
)
}
*/
}
object
Yamsvp
extends
PipelineCommand
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment