Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
eac4cd7b
Commit
eac4cd7b
authored
Nov 22, 2014
by
Peter van 't Hof
Browse files
Added mapping as separated artifact
parent
13430b2e
Changes
3
Hide whitespace changes
Inline
Side-by-side
public/mapping/.gitignore
0 → 100644
View file @
eac4cd7b
/target/
\ No newline at end of file
public/mapping/pom.xml
0 → 100644
View file @
eac4cd7b
<project
xmlns=
"http://maven.apache.org/POM/4.0.0"
xmlns:xsi=
"http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation=
"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
nl.lumc.sasc
</groupId>
<artifactId>
Mapping
</artifactId>
<version>
0.2.0-DEV
</version>
<packaging>
jar
</packaging>
<inceptionYear>
2014
</inceptionYear>
<name>
Mapping
</name>
<url>
http://maven.apache.org
</url>
<properties>
<project.build.sourceEncoding>
UTF-8
</project.build.sourceEncoding>
<sting.unpack.phase>
prepare-package
</sting.unpack.phase>
<sting.shade.phase>
package
</sting.shade.phase>
<app.main.class>
nl.lumc.sasc.biopet.core.BiopetExecutable
</app.main.class>
</properties>
<dependencies>
<dependency>
<groupId>
nl.lumc.sasc
</groupId>
<artifactId>
BiopetFramework
</artifactId>
<version>
0.2.0-DEV
</version>
</dependency>
<dependency>
<groupId>
nl.lumc.sasc
</groupId>
<artifactId>
Flexiprep
</artifactId>
<version>
0.2.0-DEV
</version>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>
src/main/resources
</directory>
<includes>
<include>
**/*
</include>
</includes>
</resource>
</resources>
<plugins>
<plugin>
<groupId>
org.scala-tools
</groupId>
<artifactId>
maven-scala-plugin
</artifactId>
<version>
2.15.2
</version>
<executions>
<execution>
<id>
scala-compile
</id>
<goals>
<goal>
compile
</goal>
<goal>
testCompile
</goal>
</goals>
<configuration>
<args>
<arg>
-dependencyfile
</arg>
<arg>
${project.build.directory}/.scala_dependencies
</arg>
<arg>
-deprecation
</arg>
<arg>
-feature
</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-jar-plugin
</artifactId>
<version>
2.5
</version>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>
true
</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>
true
</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-compiler-plugin
</artifactId>
<version>
2.3.2
</version>
<configuration>
<showDeprecation>
true
</showDeprecation>
</configuration>
</plugin>
</plugins>
</build>
</project>
public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala
0 → 100644
View file @
eac4cd7b
package
nl.lumc.sasc.biopet.pipelines.mapping
import
nl.lumc.sasc.biopet.core.config.Configurable
import
java.io.File
import
java.util.Date
import
nl.lumc.sasc.biopet.core.
{
BiopetQScript
,
PipelineCommand
}
import
nl.lumc.sasc.biopet.tools.FastqSplitter
import
nl.lumc.sasc.biopet.extensions.aligners.
{
Bwa
,
Star
,
Bowtie
,
Stampy
}
import
nl.lumc.sasc.biopet.extensions.picard.
{
MarkDuplicates
,
SortSam
,
MergeSamFiles
,
AddOrReplaceReadGroups
}
import
nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import
nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Argument
,
ClassType
}
import
scala.math._
class
Mapping
(
val
root
:
Configurable
)
extends
QScript
with
BiopetQScript
{
qscript
=>
def
this
()
=
this
(
null
)
@Input
(
doc
=
"R1 fastq file"
,
shortName
=
"R1"
,
required
=
true
)
var
input_R1
:
File
=
_
@Input
(
doc
=
"R2 fastq file"
,
shortName
=
"R2"
,
required
=
false
)
var
input_R2
:
File
=
_
@Argument
(
doc
=
"Output name"
,
shortName
=
"outputName"
,
required
=
false
)
var
outputName
:
String
=
_
@Argument
(
doc
=
"Skip flexiprep"
,
shortName
=
"skipflexiprep"
,
required
=
false
)
var
skipFlexiprep
:
Boolean
=
false
@Argument
(
doc
=
"Skip mark duplicates"
,
shortName
=
"skipmarkduplicates"
,
required
=
false
)
var
skipMarkduplicates
:
Boolean
=
false
@Argument
(
doc
=
"Skip metrics"
,
shortName
=
"skipmetrics"
,
required
=
false
)
var
skipMetrics
:
Boolean
=
false
@Argument
(
doc
=
"Aligner"
,
shortName
=
"ALN"
,
required
=
false
)
var
aligner
:
String
=
config
(
"aligner"
,
default
=
"bwa"
)
@Argument
(
doc
=
"Reference"
,
shortName
=
"R"
,
required
=
false
)
var
reference
:
File
=
config
(
"reference"
)
@Argument
(
doc
=
"Chunking"
,
shortName
=
"chunking"
,
required
=
false
)
var
chunking
:
Boolean
=
config
(
"chunking"
,
false
)
@ClassType
(
classOf
[
Int
])
@Argument
(
doc
=
"Number of chunks, when not defined pipeline will automatic calculate number of chunks"
,
shortName
=
"numberChunks"
,
required
=
false
)
var
numberChunks
:
Option
[
Int
]
=
None
// Readgroup items
@Argument
(
doc
=
"Readgroup ID"
,
shortName
=
"RGID"
,
required
=
false
)
var
RGID
:
String
=
config
(
"RGID"
)
@Argument
(
doc
=
"Readgroup Library"
,
shortName
=
"RGLB"
,
required
=
false
)
var
RGLB
:
String
=
config
(
"RGLB"
)
@Argument
(
doc
=
"Readgroup Platform"
,
shortName
=
"RGPL"
,
required
=
false
)
var
RGPL
:
String
=
config
(
"RGPL"
,
default
=
"illumina"
)
@Argument
(
doc
=
"Readgroup platform unit"
,
shortName
=
"RGPU"
,
required
=
false
)
var
RGPU
:
String
=
config
(
"RGPU"
,
default
=
"na"
)
@Argument
(
doc
=
"Readgroup sample"
,
shortName
=
"RGSM"
,
required
=
false
)
var
RGSM
:
String
=
config
(
"RGSM"
)
@Argument
(
doc
=
"Readgroup sequencing center"
,
shortName
=
"RGCN"
,
required
=
false
)
var
RGCN
:
String
=
config
(
"RGCN"
)
@Argument
(
doc
=
"Readgroup description"
,
shortName
=
"RGDS"
,
required
=
false
)
var
RGDS
:
String
=
config
(
"RGDS"
)
@Argument
(
doc
=
"Readgroup sequencing date"
,
shortName
=
"RGDT"
,
required
=
false
)
var
RGDT
:
Date
=
_
@Argument
(
doc
=
"Readgroup predicted insert size"
,
shortName
=
"RGPI"
,
required
=
false
)
var
RGPI
:
Int
=
config
(
"RGPI"
)
var
paired
:
Boolean
=
false
val
flexiprep
=
new
Flexiprep
(
this
)
def
init
()
{
if
(
outputDir
==
null
)
throw
new
IllegalStateException
(
"Missing Output directory on mapping module"
)
else
if
(!
outputDir
.
endsWith
(
"/"
))
outputDir
+=
"/"
if
(
input_R1
==
null
)
throw
new
IllegalStateException
(
"Missing FastQ R1 on mapping module"
)
paired
=
(
input_R2
!=
null
)
if
(
RGLB
==
null
)
throw
new
IllegalStateException
(
"Missing Readgroup library on mapping module"
)
if
(
RGLB
==
null
)
throw
new
IllegalStateException
(
"Missing Readgroup sample on mapping module"
)
if
(
RGID
==
null
&&
RGSM
!=
null
&&
RGLB
!=
null
)
RGID
=
RGSM
+
"-"
+
RGLB
else
if
(
RGID
==
null
)
throw
new
IllegalStateException
(
"Missing Readgroup ID on mapping module"
)
if
(
outputName
==
null
)
outputName
=
RGID
if
(
chunking
)
{
if
(
numberChunks
.
isEmpty
)
{
if
(
config
.
contains
(
"numberchunks"
))
numberChunks
=
config
(
"numberchunks"
,
default
=
None
)
else
{
val
chunkSize
:
Int
=
config
(
"chunksize"
,
(
1
<<
30
))
val
filesize
=
if
(
input_R1
.
getName
.
endsWith
(
".gz"
)
||
input_R1
.
getName
.
endsWith
(
".gzip"
))
input_R1
.
length
*
3
else
input_R1
.
length
numberChunks
=
Option
(
ceil
(
filesize
.
toDouble
/
chunkSize
).
toInt
)
}
}
logger
.
debug
(
"Chunks: "
+
numberChunks
.
getOrElse
(
1
))
}
}
def
biopetScript
()
{
var
fastq_R1
:
File
=
input_R1
var
fastq_R2
:
File
=
if
(
paired
)
input_R2
else
""
if
(!
skipFlexiprep
)
{
flexiprep
.
outputDir
=
outputDir
+
"flexiprep/"
flexiprep
.
input_R1
=
fastq_R1
if
(
paired
)
flexiprep
.
input_R2
=
fastq_R2
flexiprep
.
sampleName
=
this
.
RGSM
flexiprep
.
libraryName
=
this
.
RGLB
flexiprep
.
init
flexiprep
.
runInitialJobs
}
var
bamFiles
:
List
[
File
]
=
Nil
var
fastq_R1_output
:
List
[
File
]
=
Nil
var
fastq_R2_output
:
List
[
File
]
=
Nil
def
removeGz
(
file
:
String
)
:
String
=
{
if
(
file
.
endsWith
(
".gz"
))
return
file
.
substring
(
0
,
file
.
lastIndexOf
(
".gz"
))
else
if
(
file
.
endsWith
(
".gzip"
))
return
file
.
substring
(
0
,
file
.
lastIndexOf
(
".gzip"
))
else
return
file
}
var
chunks
:
Map
[
String
,
(
String
,
String
)]
=
Map
()
if
(
chunking
)
for
(
t
<-
1
to
numberChunks
.
getOrElse
(
1
))
{
val
chunkDir
=
outputDir
+
"chunks/"
+
t
+
"/"
chunks
+=
(
chunkDir
->
(
removeGz
(
chunkDir
+
fastq_R1
.
getName
),
if
(
paired
)
removeGz
(
chunkDir
+
fastq_R2
.
getName
)
else
""
))
}
else
chunks
+=
(
outputDir
->
(
flexiprep
.
extractIfNeeded
(
fastq_R1
,
flexiprep
.
outputDir
),
flexiprep
.
extractIfNeeded
(
fastq_R2
,
flexiprep
.
outputDir
)))
if
(
chunking
)
{
val
fastSplitter_R1
=
new
FastqSplitter
(
this
)
fastSplitter_R1
.
input
=
fastq_R1
for
((
chunkDir
,
fastqfile
)
<-
chunks
)
fastSplitter_R1
.
output
:+=
fastqfile
.
_1
add
(
fastSplitter_R1
,
isIntermediate
=
true
)
if
(
paired
)
{
val
fastSplitter_R2
=
new
FastqSplitter
(
this
)
fastSplitter_R2
.
input
=
fastq_R2
for
((
chunkDir
,
fastqfile
)
<-
chunks
)
fastSplitter_R2
.
output
:+=
fastqfile
.
_2
add
(
fastSplitter_R2
,
isIntermediate
=
true
)
}
}
for
((
chunkDir
,
fastqfile
)
<-
chunks
)
{
var
R1
=
fastqfile
.
_1
var
R2
=
fastqfile
.
_2
var
deps
:
List
[
File
]
=
Nil
if
(!
skipFlexiprep
)
{
val
flexiout
=
flexiprep
.
runTrimClip
(
R1
,
R2
,
chunkDir
+
"flexiprep/"
,
chunkDir
)
logger
.
debug
(
chunkDir
+
" - "
+
flexiout
)
R1
=
flexiout
.
_1
if
(
paired
)
R2
=
flexiout
.
_2
deps
=
flexiout
.
_3
fastq_R1_output
:+=
R1
fastq_R2_output
:+=
R2
}
val
outputBam
=
new
File
(
chunkDir
+
outputName
+
".bam"
)
bamFiles
:+=
outputBam
aligner
match
{
case
"bwa"
=>
addBwa
(
R1
,
R2
,
outputBam
,
deps
)
case
"bowtie"
=>
addBowtie
(
R1
,
R2
,
outputBam
,
deps
)
case
"stampy"
=>
addStampy
(
R1
,
R2
,
outputBam
,
deps
)
case
"star"
=>
addStar
(
R1
,
R2
,
outputBam
,
deps
)
case
"star-2pass"
=>
addStar2pass
(
R1
,
R2
,
outputBam
,
deps
)
case
_
=>
throw
new
IllegalStateException
(
"Option Aligner: '"
+
aligner
+
"' is not valid"
)
}
if
(
config
(
"chunk_metrics"
,
default
=
false
))
addAll
(
BamMetrics
(
this
,
outputBam
,
chunkDir
+
"metrics/"
).
functions
)
}
if
(!
skipFlexiprep
)
{
flexiprep
.
runFinalize
(
fastq_R1_output
,
fastq_R2_output
)
addAll
(
flexiprep
.
functions
)
// Add function of flexiprep to curent function pool
}
var
bamFile
=
bamFiles
.
head
if
(!
skipMarkduplicates
)
{
bamFile
=
new
File
(
outputDir
+
outputName
+
".dedup.bam"
)
add
(
MarkDuplicates
(
this
,
bamFiles
,
bamFile
))
}
else
if
(
skipMarkduplicates
&&
chunking
)
{
val
mergeSamFile
=
MergeSamFiles
(
this
,
bamFiles
,
outputDir
)
add
(
mergeSamFile
)
bamFile
=
mergeSamFile
.
output
}
if
(!
skipMetrics
)
addAll
(
BamMetrics
(
this
,
bamFile
,
outputDir
+
"metrics/"
).
functions
)
outputFiles
+=
(
"finalBamFile"
->
bamFile
)
}
def
addBwa
(
R1
:
File
,
R2
:
File
,
output
:
File
,
deps
:
List
[
File
])
:
File
=
{
val
bwaCommand
=
new
Bwa
(
this
)
bwaCommand
.
R1
=
R1
if
(
paired
)
bwaCommand
.
R2
=
R2
bwaCommand
.
deps
=
deps
bwaCommand
.
R
=
getReadGroup
bwaCommand
.
output
=
this
.
swapExt
(
output
.
getParent
,
output
,
".bam"
,
".sam"
)
add
(
bwaCommand
,
isIntermediate
=
true
)
val
sortSam
=
SortSam
(
this
,
bwaCommand
.
output
,
output
)
if
(
chunking
||
!
skipMarkduplicates
)
sortSam
.
isIntermediate
=
true
add
(
sortSam
)
return
sortSam
.
output
}
def
addStampy
(
R1
:
File
,
R2
:
File
,
output
:
File
,
deps
:
List
[
File
])
:
File
=
{
var
RG
:
String
=
"ID:"
+
RGID
+
","
RG
+=
"SM:"
+
RGSM
+
","
RG
+=
"LB:"
+
RGLB
+
","
if
(
RGDS
!=
null
)
RG
+=
"DS"
+
RGDS
+
","
RG
+=
"PU:"
+
RGPU
+
","
if
(
RGPI
>
0
)
RG
+=
"PI:"
+
RGPI
+
","
if
(
RGCN
!=
null
)
RG
+=
"CN:"
+
RGCN
+
","
if
(
RGDT
!=
null
)
RG
+=
"DT:"
+
RGDT
+
","
RG
+=
"PL:"
+
RGPL
val
stampyCmd
=
new
Stampy
(
this
)
stampyCmd
.
R1
=
R1
if
(
paired
)
stampyCmd
.
R2
=
R2
stampyCmd
.
deps
=
deps
stampyCmd
.
readgroup
=
RG
stampyCmd
.
sanger
=
true
stampyCmd
.
output
=
this
.
swapExt
(
output
.
getParent
,
output
,
".bam"
,
".sam"
)
add
(
stampyCmd
,
isIntermediate
=
true
)
val
sortSam
=
SortSam
(
this
,
stampyCmd
.
output
,
output
)
if
(
chunking
||
!
skipMarkduplicates
)
sortSam
.
isIntermediate
=
true
add
(
sortSam
)
return
sortSam
.
output
}
def
addBowtie
(
R1
:
File
,
R2
:
File
,
output
:
File
,
deps
:
List
[
File
])
:
File
=
{
val
bowtie
=
new
Bowtie
(
this
)
bowtie
.
R1
=
R1
if
(
paired
)
bowtie
.
R2
=
R2
bowtie
.
deps
=
deps
bowtie
.
output
=
this
.
swapExt
(
output
.
getParent
,
output
,
".bam"
,
".sam"
)
add
(
bowtie
,
isIntermediate
=
true
)
return
addAddOrReplaceReadGroups
(
bowtie
.
output
,
output
)
}
def
addStar
(
R1
:
File
,
R2
:
File
,
output
:
File
,
deps
:
List
[
File
])
:
File
=
{
val
starCommand
=
Star
(
this
,
R1
,
if
(
paired
)
R2
else
null
,
outputDir
,
isIntermediate
=
true
,
deps
=
deps
)
add
(
starCommand
)
return
addAddOrReplaceReadGroups
(
starCommand
.
outputSam
,
output
)
}
def
addStar2pass
(
R1
:
File
,
R2
:
File
,
output
:
File
,
deps
:
List
[
File
])
:
File
=
{
val
starCommand
=
Star
.
_2pass
(
this
,
R1
,
if
(
paired
)
R2
else
null
,
outputDir
,
isIntermediate
=
true
,
deps
=
deps
)
addAll
(
starCommand
.
_2
)
return
addAddOrReplaceReadGroups
(
starCommand
.
_1
,
output
)
}
def
addAddOrReplaceReadGroups
(
input
:
File
,
output
:
File
)
:
File
=
{
val
addOrReplaceReadGroups
=
AddOrReplaceReadGroups
(
this
,
input
,
output
)
addOrReplaceReadGroups
.
createIndex
=
true
addOrReplaceReadGroups
.
RGID
=
RGID
addOrReplaceReadGroups
.
RGLB
=
RGLB
addOrReplaceReadGroups
.
RGPL
=
RGPL
addOrReplaceReadGroups
.
RGPU
=
RGPU
addOrReplaceReadGroups
.
RGSM
=
RGSM
if
(
RGCN
!=
null
)
addOrReplaceReadGroups
.
RGCN
=
RGCN
if
(
RGDS
!=
null
)
addOrReplaceReadGroups
.
RGDS
=
RGDS
if
(!
skipMarkduplicates
)
addOrReplaceReadGroups
.
isIntermediate
=
true
add
(
addOrReplaceReadGroups
)
return
addOrReplaceReadGroups
.
output
}
def
getReadGroup
()
:
String
=
{
var
RG
:
String
=
"@RG\\t"
+
"ID:"
+
RGID
+
"\\t"
RG
+=
"LB:"
+
RGLB
+
"\\t"
RG
+=
"PL:"
+
RGPL
+
"\\t"
RG
+=
"PU:"
+
RGPU
+
"\\t"
RG
+=
"SM:"
+
RGSM
+
"\\t"
if
(
RGCN
!=
null
)
RG
+=
"CN:"
+
RGCN
+
"\\t"
if
(
RGDS
!=
null
)
RG
+=
"DS"
+
RGDS
+
"\\t"
if
(
RGDT
!=
null
)
RG
+=
"DT"
+
RGDT
+
"\\t"
if
(
RGPI
>
0
)
RG
+=
"PI"
+
RGPI
+
"\\t"
return
RG
.
substring
(
0
,
RG
.
lastIndexOf
(
"\\t"
))
}
}
object
Mapping
extends
PipelineCommand
{
def
loadFromLibraryConfig
(
root
:
Configurable
,
runConfig
:
Map
[
String
,
Any
],
sampleConfig
:
Map
[
String
,
Any
],
runDir
:
String
,
startJobs
:
Boolean
=
true
)
:
Mapping
=
{
val
mapping
=
new
Mapping
(
root
)
logger
.
debug
(
"Mapping runconfig: "
+
runConfig
)
if
(
runConfig
.
contains
(
"R1"
))
mapping
.
input_R1
=
new
File
(
runConfig
(
"R1"
).
toString
)
if
(
runConfig
.
contains
(
"R2"
))
mapping
.
input_R2
=
new
File
(
runConfig
(
"R2"
).
toString
)
mapping
.
paired
=
(
mapping
.
input_R2
!=
null
)
mapping
.
RGLB
=
runConfig
(
"ID"
).
toString
mapping
.
RGSM
=
sampleConfig
(
"ID"
).
toString
if
(
runConfig
.
contains
(
"PL"
))
mapping
.
RGPL
=
runConfig
(
"PL"
).
toString
if
(
runConfig
.
contains
(
"PU"
))
mapping
.
RGPU
=
runConfig
(
"PU"
).
toString
if
(
runConfig
.
contains
(
"CN"
))
mapping
.
RGCN
=
runConfig
(
"CN"
).
toString
mapping
.
outputDir
=
runDir
if
(
startJobs
)
{
mapping
.
init
mapping
.
biopetScript
}
return
mapping
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment