Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
biopet.biopet
Manage
Activity
Members
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Analyze
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Mirrors
biopet.biopet
Commits
7734e7d3
Commit
7734e7d3
authored
10 years ago
by
Sander van der Zeeuw
Browse files
Options
Downloads
Patches
Plain Diff
Added changes for new sampling handling
parent
cfd06a1e
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala
+61
-76
61 additions, 76 deletions
.../main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala
with
61 additions
and
76 deletions
public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala
+
61
−
76
View file @
7734e7d3
...
...
@@ -15,13 +15,15 @@
*/
package
nl.lumc.sasc.biopet.pipelines.carp
import
java.io.File
import
nl.lumc.sasc.biopet.extensions.Ln
import
nl.lumc.sasc.biopet.extensions.macs2.Macs2CallPeak
import
nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.utils.commandline.
{
Argument
,
Input
}
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Argument
}
import
nl.lumc.sasc.biopet.extensions.aligners.
{
Bwa
,
Star
,
Bowtie
,
Stampy
}
import
nl.lumc.sasc.biopet.core._
import
nl.lumc.sasc.biopet.core.config._
import
nl.lumc.sasc.biopet.pipelines.mapping.Mapping
...
...
@@ -32,14 +34,60 @@ import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
* This pipeline performs QC,mapping and peak calling
*/
class
Carp
(
val
root
:
Configurable
)
extends
QScript
with
MultiSampleQScript
{
qscript
=>
def
this
()
=
this
(
null
)
class
LibraryOutput
extends
AbstractLibraryOutput
{
var
mappedBamFile
:
File
=
_
}
override
def
defaults
=
ConfigUtils
.
mergeMaps
(
Map
(
"mapping"
->
Map
(
"skip_markduplicates"
->
true
)
),
super
.
defaults
)
def
makeSample
(
id
:
String
)
=
new
Sample
(
id
)
class
Sample
(
sampleId
:
String
)
extends
AbstractSample
(
sampleId
)
{
def
makeLibrary
(
id
:
String
)
=
new
Library
(
id
)
class
Library
(
libraryId
:
String
)
extends
AbstractLibrary
(
libraryId
)
{
val
mapping
=
new
Mapping
(
qscript
)
def
addJobs
()
:
Unit
=
{
if
(
config
.
contains
(
"R1"
))
{
mapping
.
input_R1
=
config
(
"R1"
)
if
(
config
.
contains
(
"R2"
))
mapping
.
input_R2
=
config
(
"R2"
)
mapping
.
libraryId
=
libraryId
mapping
.
sampleId
=
sampleId
mapping
.
outputDir
=
libDir
mapping
.
init
mapping
.
biopetScript
addAll
(
mapping
.
functions
)
}
else
logger
.
error
(
"Sample: "
+
sampleId
+
": No R1 found for library: "
+
libraryId
)
}
}
val
bamFile
=
new
File
(
sampleDir
+
sampleId
+
".bam"
)
val
controls
:
List
[
String
]
=
config
(
"control"
,
default
=
Nil
)
def
addJobs
()
:
Unit
=
{
addLibsJobs
()
val
bamFiles
=
libraries
.
map
(
_
.
_2
.
mapping
.
finalBamFile
).
toList
if
(
bamFiles
.
length
==
1
)
{
add
(
Ln
(
qscript
,
bamFiles
.
head
,
bamFile
))
val
oldIndex
=
new
File
(
bamFiles
.
head
.
getAbsolutePath
.
stripSuffix
(
".bam"
)
+
".bai"
)
val
newIndex
=
new
File
(
bamFile
.
getAbsolutePath
.
stripSuffix
(
".bam"
)
+
".bai"
)
add
(
Ln
(
qscript
,
oldIndex
,
newIndex
))
}
else
if
(
bamFiles
.
length
>
1
)
{
val
merge
=
new
MergeSamFiles
(
qscript
)
merge
.
input
=
bamFiles
merge
.
sortOrder
=
"coordinate"
merge
.
output
=
bamFile
add
(
merge
)
}
class
SampleOutput
extends
AbstractSampleOutput
{
var
mappedBamFile
:
File
=
_
val
macs2
=
new
Macs2CallPeak
(
qscript
)
macs2
.
treatment
=
bamFile
macs2
.
name
=
sampleId
macs2
.
outputdir
=
sampleDir
+
"macs2/"
+
macs2
.
name
+
"/"
add
(
macs2
)
}
}
def
init
()
{
...
...
@@ -52,84 +100,21 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript {
// Third step is calling peaks on the bam files produced with the mapping pipeline, this will be done with MACS2
logger
.
info
(
"Starting CArP pipeline"
)
runSamplesJobs
for
(
sample
<-
getSamples
)
{
val
controls
:
List
[
String
]
=
config
(
"control"
,
sample
=
sample
,
default
=
Nil
)
addSamplesJobs
for
(
control
<-
controls
)
{
if
(!
getSamples
.
exists
(
_
==
control
))
for
((
sampleId
,
sample
)
<-
samples
)
{
for
(
control
<-
sample
.
controls
)
{
if
(!
samples
.
exists
(
_
==
control
))
throw
new
IllegalStateException
(
"For sample: "
+
sample
+
" this control: "
+
control
+
" does not exist"
)
val
macs2
=
new
Macs2CallPeak
(
this
)
macs2
.
treatment
=
sample
sOutput
(
sample
).
mappedB
amFile
macs2
.
control
=
samples
Output
(
control
).
mappedB
amFile
macs2
.
treatment
=
sample
.
b
amFile
macs2
.
control
=
samples
(
control
).
b
amFile
macs2
.
name
=
sample
+
"_VS_"
+
control
macs2
.
outputdir
=
globalSampleDir
+
sample
+
"/"
+
"macs2/"
+
macs2
.
name
+
"/"
macs2
.
outputdir
=
sample
.
sample
Dir
+
"/"
+
"macs2/"
+
macs2
.
name
+
"/"
add
(
macs2
)
}
}
}
def
runSingleSampleJobs
(
sampleConfig
:
Map
[
String
,
Any
])
:
SampleOutput
=
{
val
sampleOutput
=
new
SampleOutput
val
sampleID
:
String
=
getCurrentSample
val
sampleDir
=
globalSampleDir
+
sampleID
+
"/"
sampleOutput
.
libraries
=
runLibraryJobs
(
sampleConfig
)
val
bamfiles
=
sampleOutput
.
libraries
.
map
(
_
.
_2
.
mappedBamFile
).
toList
sampleOutput
.
mappedBamFile
=
new
File
(
sampleDir
+
sampleID
+
".bam"
)
if
(
bamfiles
.
length
==
1
)
{
add
(
Ln
(
this
,
bamfiles
.
head
,
sampleOutput
.
mappedBamFile
))
val
oldIndex
=
new
File
(
bamfiles
.
head
.
getAbsolutePath
.
stripSuffix
(
".bam"
)
+
".bai"
)
val
newIndex
=
new
File
(
sampleOutput
.
mappedBamFile
.
getAbsolutePath
.
stripSuffix
(
".bam"
)
+
".bai"
)
add
(
Ln
(
this
,
oldIndex
,
newIndex
))
}
else
if
(
bamfiles
.
length
>
1
)
{
val
merge
=
new
MergeSamFiles
(
this
)
merge
.
input
=
bamfiles
merge
.
sortOrder
=
"coordinate"
merge
.
output
=
sampleOutput
.
mappedBamFile
add
(
merge
)
}
val
macs2
=
new
Macs2CallPeak
(
this
)
macs2
.
treatment
=
sampleOutput
.
mappedBamFile
macs2
.
name
=
sampleID
macs2
.
outputdir
=
sampleDir
+
"macs2/"
+
macs2
.
name
+
"/"
add
(
macs2
)
return
sampleOutput
}
def
runSingleLibraryJobs
(
runConfig
:
Map
[
String
,
Any
],
sampleConfig
:
Map
[
String
,
Any
])
:
LibraryOutput
=
{
val
libraryOutput
=
new
LibraryOutput
val
runID
:
String
=
getCurrentLibrary
val
sampleID
:
String
=
getCurrentSample
val
runDir
:
String
=
globalSampleDir
+
sampleID
+
"/run_"
+
runID
+
"/"
if
(
runConfig
.
contains
(
"R1"
))
{
val
mapping
=
new
Mapping
(
this
)
mapping
.
skipMarkduplicates
=
config
(
"skip_markduplicates"
,
default
=
true
)
// we do the dedup marking using Sambamba
mapping
.
input_R1
=
new
File
(
runConfig
(
"R1"
).
toString
)
if
(
runConfig
.
contains
(
"R2"
))
mapping
.
input_R2
=
new
File
(
runConfig
(
"R2"
).
toString
)
mapping
.
RGLB
=
runConfig
(
"ID"
).
toString
mapping
.
RGSM
=
sampleConfig
(
"ID"
).
toString
if
(
runConfig
.
contains
(
"PL"
))
mapping
.
RGPL
=
runConfig
(
"PL"
).
toString
if
(
runConfig
.
contains
(
"PU"
))
mapping
.
RGPU
=
runConfig
(
"PU"
).
toString
if
(
runConfig
.
contains
(
"CN"
))
mapping
.
RGCN
=
runConfig
(
"CN"
).
toString
mapping
.
outputDir
=
runDir
mapping
.
init
mapping
.
biopetScript
addAll
(
mapping
.
functions
)
libraryOutput
.
mappedBamFile
=
mapping
.
outputFiles
(
"finalBamFile"
)
}
else
this
.
logger
.
error
(
"Sample: "
+
sampleID
+
": No R1 found for run: "
+
runConfig
)
return
libraryOutput
}
}
object
Carp
extends
PipelineCommand
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment