Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
0bc09ac2
Commit
0bc09ac2
authored
Jun 29, 2016
by
Peter van 't Hof
Browse files
Merge branch '361-fix-wipereads-extension' into 'develop'
Fixing WipeReads Closes #361 See merge request !428
parents
736e0109
8b40a367
Changes
3
Hide whitespace changes
Inline
Side-by-side
biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/WipeReads.scala
View file @
0bc09ac2
...
...
@@ -18,9 +18,8 @@ import java.io.File
import
nl.lumc.sasc.biopet.core.ToolCommandFunction
import
nl.lumc.sasc.biopet.utils.config.Configurable
import
org.broadinstitute.gatk.utils.commandline.
{
Input
,
Output
}
import
org.broadinstitute.gatk.utils.commandline.
{
Argument
,
Input
,
Output
}
// TODO: finish implementation for usage in pipelines
/**
* WipeReads function class for usage in Biopet pipelines
*
...
...
@@ -36,9 +35,52 @@ class WipeReads(val root: Configurable) extends ToolCommandFunction {
@Input
(
doc
=
"Interval file"
,
shortName
=
"r"
,
required
=
true
)
var
intervalFile
:
File
=
null
@Argument
(
doc
=
"Minimum MAPQ of reads in target region to remove (default: 0)"
)
var
minMapQ
:
Option
[
Int
]
=
config
(
"min_mapq"
)
@Argument
(
doc
=
"Read group IDs to be removed (default: remove reads from all read groups)"
)
var
readgroup
:
Set
[
String
]
=
config
(
"read_group"
,
default
=
Nil
)
@Argument
(
doc
=
"Whether to remove multiple-mapped reads outside the target regions (default: yes)"
)
var
limitRemoval
:
Boolean
=
config
(
"limit_removal"
,
default
=
false
)
@Argument
(
doc
=
"Whether to index output BAM file or not"
)
var
noMakeIndex
:
Boolean
=
config
(
"no_make_index"
,
default
=
false
)
@Argument
(
doc
=
"GTF feature containing intervals (default: exon)"
)
var
featureType
:
Option
[
String
]
=
config
(
"feature_type"
)
@Argument
(
doc
=
"Expected maximum number of reads in target regions (default: 7e7)"
)
var
bloomSize
:
Option
[
Long
]
=
config
(
"bloom_size"
)
@Argument
(
doc
=
"False positive rate (default: 4e-7)"
)
var
falsePositive
:
Option
[
Long
]
=
config
(
"false_positive"
)
@Output
(
doc
=
"Output BAM"
,
shortName
=
"o"
,
required
=
true
)
var
outputBam
:
File
=
null
@Output
(
required
=
false
)
private
var
outputIndex
:
Option
[
File
]
=
None
@Output
(
doc
=
"BAM containing discarded reads"
,
shortName
=
"f"
,
required
=
false
)
var
discardedBam
:
File
=
null
var
discardedBam
:
Option
[
File
]
=
None
override
def
beforeGraph
()
{
super
.
beforeGraph
()
if
(!
noMakeIndex
)
outputIndex
=
Some
(
new
File
(
outputBam
.
getPath
.
stripSuffix
(
".bam"
)
+
".bai"
))
}
override
def
cmdLine
=
super
.
cmdLine
+
required
(
"-I"
,
inputBam
)
+
required
(
"-r"
,
intervalFile
)
+
required
(
"-o"
,
outputBam
)
+
optional
(
"-f"
,
discardedBam
)
+
optional
(
"-Q"
,
minMapQ
)
+
readgroup
.
foreach
(
rg
=>
required
(
"-G"
,
rg
))
+
conditional
(
limitRemoval
,
"--limit_removal"
)
+
conditional
(
noMakeIndex
,
"--no_make_index"
)
+
optional
(
"-t"
,
featureType
)
+
optional
(
"--bloom_size"
,
bloomSize
)
+
optional
(
"--false_positive"
,
falsePositive
)
}
gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala
View file @
0bc09ac2
...
...
@@ -119,6 +119,10 @@ class Gentrap(val root: Configurable) extends QScript
"mapping"
->
Map
(
"aligner"
->
"gsnap"
,
"skip_markduplicates"
->
true
),
"wipereads"
->
Map
(
"limit_removal"
->
true
,
"no_make_index"
->
false
)
)
...
...
@@ -209,7 +213,7 @@ class Gentrap(val root: Configurable) extends QScript
job
.
inputBam
=
bamFile
.
get
ribosomalRefFlat
().
foreach
(
job
.
intervalFile
=
_
)
job
.
outputBam
=
createFile
(
"cleaned.bam"
)
job
.
discardedBam
=
createFile
(
"rrna.bam"
)
job
.
discardedBam
=
Some
(
createFile
(
"rrna.bam"
)
)
add
(
job
)
Some
(
job
.
outputBam
)
}
else
bamFile
...
...
gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala
View file @
0bc09ac2
...
...
@@ -21,7 +21,7 @@ import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, BiopetPipe }
import
nl.lumc.sasc.biopet.extensions._
import
nl.lumc.sasc.biopet.extensions.gmap.Gsnap
import
nl.lumc.sasc.biopet.extensions.hisat.Hisat2
import
nl.lumc.sasc.biopet.extensions.tools.BaseCounter
import
nl.lumc.sasc.biopet.extensions.tools.
{
BaseCounter
,
WipeReads
}
import
nl.lumc.sasc.biopet.utils.ConfigUtils
import
nl.lumc.sasc.biopet.utils.config.Config
import
org.broadinstitute.gatk.queue.QSettings
...
...
@@ -71,10 +71,6 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O
.
toMap
)
val
validExpressionMeasures
=
Set
(
"fragments_per_gene"
,
"fragments_per_exon"
,
"base_counts"
,
"cufflinks_strict"
,
"cufflinks_guided"
,
"cufflinks_blind"
)
@DataProvider
(
name
=
"expMeasuresstrandProtocol"
)
def
expMeasuresStrandProtocolProvider
=
{
...
...
@@ -89,27 +85,23 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O
}
yield
makeSamplesConfig
(
sampleNum
,
libNum
,
libType
)
val
strandProtocols
=
Array
(
"non_specific"
,
"dutp"
)
// get all possible combinations of expression measures
val
expressionMeasures
=
validExpressionMeasures
//.subsets
//.map(_.toList)
.
toArray
for
{
sampleConfig
<-
sampleConfigs
.
toArray
//expressionMeasure <- expressionMeasures
strandProtocol
<-
strandProtocols
}
yield
Array
(
sampleConfig
,
List
(
expressionMeasure
),
strandProtocol
)
removeRiboReads
<-
Array
(
true
,
false
)
}
yield
Array
(
sampleConfig
,
List
(
expressionMeasure
),
strandProtocol
,
removeRiboReads
)
}
@Test
(
dataProvider
=
"expMeasuresstrandProtocol"
)
def
testGentrap
(
sampleConfig
:
SamplesConfig
,
expMeasures
:
List
[
String
],
strandProtocol
:
String
)
=
{
def
testGentrap
(
sampleConfig
:
SamplesConfig
,
expMeasures
:
List
[
String
],
strandProtocol
:
String
,
removeRiboReads
:
Boolean
)
=
{
val
settings
=
Map
(
"output_dir"
->
GentrapTest
.
outputDir
,
"gsnap"
->
Map
(
"db"
->
"test"
,
"dir"
->
"test"
),
"expression_measures"
->
expMeasures
,
"strand_protocol"
->
strandProtocol
"strand_protocol"
->
strandProtocol
,
"remove_ribosomal_reads"
->
removeRiboReads
)
++
aligner
.
map
(
"aligner"
->
_
)
val
config
=
ConfigUtils
.
mergeMaps
(
settings
++
sampleConfig
,
Map
(
GentrapTest
.
executables
.
toSeq
:
_
*
))
val
gentrap
:
Gentrap
=
initPipeline
(
config
)
...
...
@@ -146,6 +138,9 @@ abstract class GentrapTestAbstract(val expressionMeasure: String, val aligner: O
assert
(
gentrap
.
functions
.
exists
(
_
.
isInstanceOf
[
Ln
]))
}
gentrap
.
removeRibosomalReads
shouldBe
removeRiboReads
gentrap
.
functions
.
exists
(
_
.
isInstanceOf
[
WipeReads
])
shouldBe
removeRiboReads
val
classMap
=
Map
(
"gsnap"
->
classOf
[
Gsnap
],
"tophat"
->
classOf
[
Tophat
],
...
...
@@ -196,6 +191,7 @@ object GentrapTest {
"annotation_gtf"
->
(
outputDir
+
File
.
separator
+
"ref.fa"
),
"annotation_bed"
->
(
outputDir
+
File
.
separator
+
"ref.fa"
),
"annotation_refflat"
->
(
outputDir
+
File
.
separator
+
"ref.fa"
),
"ribosome_refflat"
->
(
outputDir
+
File
.
separator
+
"ref.fa"
),
"varscan_jar"
->
"test"
,
"rscript"
->
Map
(
"exe"
->
"test"
)
)
++
Seq
(
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment