Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Mirrors
biopet.biopet
Commits
d6acc80f
Commit
d6acc80f
authored
Jun 26, 2015
by
bow
Browse files
Initial Gentrap update to use new Metrics module
parent
eb9c02c0
Changes
7
Hide whitespace changes
Inline
Side-by-side
public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
View file @
d6acc80f
...
...
@@ -38,7 +38,9 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
/** Bed of amplicon that is used */
var
ampliconBedFile
:
Option
[
File
]
=
config
(
"amplicon_bed"
)
var
rnaMetrics
:
Boolean
=
config
(
"rna_metrcis"
,
default
=
false
)
/** Settings for CollectRnaSeqMetrics */
var
rnaMetricsSettings
:
Map
[
String
,
String
]
=
Map
()
var
transcriptRefFlatFile
:
Option
[
File
]
=
config
(
"transcript_refflat"
)
/** return location of summary file */
def
summaryFile
=
(
sampleId
,
libId
)
match
{
...
...
@@ -84,11 +86,14 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
add
(
wgsMetrics
)
addSummarizable
(
wgsMetrics
,
"wgs"
)
if
(
rnaMetrics
)
{
if
(
transcriptRefFlatFile
.
isDefined
)
{
val
rnaMetrics
=
new
CollectRnaSeqMetrics
(
this
)
rnaMetrics
.
input
=
inputBam
rnaMetrics
.
output
=
swapExt
(
outputDir
,
inputBam
,
".bam"
,
".rna.metrics"
)
rnaMetrics
.
chartOutput
=
Some
(
swapExt
(
outputDir
,
inputBam
,
".bam"
,
".rna.metrics.pdf"
))
rnaMetrics
.
refFlat
=
transcriptRefFlatFile
.
get
rnaMetrics
.
ribosomalIntervals
=
rnaMetricsSettings
.
get
(
"ribosomal_intervals"
).
collect
{
case
n
=>
new
File
(
n
)
}
rnaMetrics
.
strandSpecificity
=
rnaMetricsSettings
.
get
(
"strand_specificity"
)
add
(
rnaMetrics
)
addSummarizable
(
rnaMetrics
,
"rna"
)
}
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectMultipleMetrics.scala
View file @
d6acc80f
...
...
@@ -41,7 +41,7 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
}
case
_
if
p
==
Programs
.
CollectInsertSizeMetrics
.
toString
=>
{
outputFiles
:+=
new
File
(
outputName
+
".insert_size_metrics"
)
outputFiles
:+=
new
File
(
outputName
+
".insert_size_
H
istogram.pdf"
)
outputFiles
:+=
new
File
(
outputName
+
".insert_size_
h
istogram.pdf"
)
}
case
_
if
p
==
Programs
.
QualityScoreDistribution
.
toString
=>
{
outputFiles
:+=
new
File
(
outputName
+
".quality_distribution_metrics"
)
...
...
@@ -85,17 +85,25 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
case
_
=>
None
}
val
sum
=
new
Summarizable
{
override
def
summaryFiles
:
Map
[
String
,
File
]
=
Map
()
override
def
summaryStats
=
stats
override
def
summaryFiles
:
Map
[
String
,
File
]
=
Map
()
}
qscript
.
addSummarizable
(
sum
,
p
)
})
}
def
summaryFiles
=
Map
()
def
summaryStats
=
Map
()
def
summaryFiles
=
{
program
.
map
{
case
p
if
p
==
Programs
.
CollectInsertSizeMetrics
.
toString
=>
Map
(
"insert_size_histogram"
->
new
File
(
outputName
+
".insert_size_histogram.pdf"
),
"insert_size_metrics"
->
new
File
(
outputName
+
".insert_size_metrics"
))
case
otherwise
=>
Map
()
}.
foldLeft
(
Map
.
empty
[
String
,
File
])
{
case
(
acc
,
m
)
=>
(
acc
++
m
)
}
}
}
object
CollectMultipleMetrics
{
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectRnaSeqMetrics.scala
View file @
d6acc80f
...
...
@@ -32,7 +32,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
var
input
:
File
=
null
@Input
(
doc
=
"Gene annotations in refFlat form"
,
required
=
true
)
var
refFlat
:
File
=
config
(
"refFlat"
)
var
refFlat
:
File
=
null
@Input
(
doc
=
"Location of rRNA sequences in interval list format"
,
required
=
false
)
var
ribosomalIntervals
:
Option
[
File
]
=
config
(
"ribosomal_intervals"
)
...
...
@@ -68,6 +68,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
var
stopAfter
:
Option
[
Long
]
=
config
(
"stop_after"
)
override
def
beforeGraph
:
Unit
=
{
require
(
refFlat
!=
null
,
"RefFlat file must be supplied."
)
val
validFlags
=
StrandSpecificity
.
values
.
map
(
_
.
toString
).
toSet
strandSpecificity
match
{
case
Some
(
s
)
=>
require
(
validFlags
.
contains
(
s
),
...
...
public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeTables.scala
View file @
d6acc80f
...
...
@@ -71,7 +71,7 @@ class MergeTables(val root: Configurable) extends ToolCommandFuntion {
required
(
"-a"
,
valueColumnIndex
)
+
optional
(
"-n"
,
idColumnName
)
+
optional
(
"-e"
,
fileExtension
)
+
optional
(
"-
h
"
,
numHeaderLines
)
+
optional
(
"-
m
"
,
numHeaderLines
)
+
optional
(
"-f"
,
fallbackString
)
+
optional
(
"-d"
,
delimiter
)
+
required
(
"-o"
,
output
)
+
...
...
@@ -206,7 +206,7 @@ object MergeTables extends ToolCommand {
c
.
copy
(
fileExtension
=
x
)
}
text
"Common extension of all input tables to strip (default: empty string)"
opt
[
Int
](
'
h
'
,
"num_header_lines"
)
optional
()
action
{
(
x
,
c
)
=>
opt
[
Int
](
'
m
'
,
"num_header_lines"
)
optional
()
action
{
(
x
,
c
)
=>
c
.
copy
(
numHeaderLines
=
x
)
}
text
"The number of header lines present in all input files (default: 0; no header)"
...
...
public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/scripts/pdf_report.py
View file @
d6acc80f
...
...
@@ -53,7 +53,7 @@ class FastQCModule(object):
def
__repr__
(
self
):
return
'%s(%s)'
%
(
self
.
__class__
.
__name__
,
'[%r, ...]'
%
self
.
raw_lines
[
0
])
'[%r, ...]'
%
self
.
raw_lines
[
0
])
def
__str__
(
self
):
return
''
.
join
(
self
.
raw_lines
)
...
...
@@ -88,7 +88,7 @@ class FastQCModule(object):
self
.
_name
=
name
status
=
tokens
[
-
1
]
assert
status
in
(
'pass'
,
'fail'
,
'warn'
),
"Unknown module status: %r"
\
%
status
%
status
self
.
_status
=
status
# and column names from second line
columns
=
self
.
raw_lines
[
1
][
1
:].
strip
().
split
(
'
\t
'
)
...
...
@@ -123,7 +123,7 @@ class FastQC(object):
'>>Sequence Duplication Levels'
:
'sequence_duplication_levels'
,
'>>Overrepresented sequences'
:
'overrepresented_sequences'
,
'>>Kmer content'
:
'kmer_content'
,
}
}
def
__init__
(
self
,
fname
):
"""
...
...
@@ -299,12 +299,12 @@ class LongTable(object):
"
\\
hline
\\
hline"
,
"
\\
endhead"
,
"
\\
hline
\\
multicolumn{%i}{c}{
\\
textit{Continued on next page}}
\\\\
"
%
\
colnum
,
colnum
,
"
\\
hline"
,
"
\\
endfoot"
,
"
\\
hline"
,
"
\\
endlastfoot"
,
]
]
def
__str__
(
self
):
return
"
\n
"
.
join
(
self
.
lines
)
...
...
@@ -314,7 +314,7 @@ class LongTable(object):
def
end
(
self
):
self
.
lines
.
extend
([
"
\\
end{longtable}"
,
"
\\
end{center}"
,
"
\\
addtocounter{table}{-1}"
])
"
\\
addtocounter{table}{-1}"
])
# filter functions for the jinja environment
...
...
@@ -348,7 +348,7 @@ def float2nice_pct(num, default="None"):
# and some handy functions
def
natural_sort
(
inlist
):
key
=
lambda
x
:
[
int
(
a
)
if
a
.
isdigit
()
else
a
.
lower
()
for
a
in
re
.
split
(
"([0-9]+)"
,
x
)]
re
.
split
(
"([0-9]+)"
,
x
)]
inlist
.
sort
(
key
=
key
)
return
inlist
...
...
@@ -383,7 +383,7 @@ def write_template(run, template_file, logo_file):
run
.
logo
=
logo_file
render_vars
=
{
"run"
:
run
,
}
}
rendered
=
jinja_template
.
render
(
**
render_vars
)
print
(
rendered
,
file
=
sys
.
stdout
)
...
...
@@ -417,36 +417,40 @@ class GentrapLib(object):
self
.
fastqc_r2_qc_files
=
self
.
flexiprep
[
"files"
][
"fastqc_R2_qc"
]
self
.
fastqc_r2_qc
=
FastQC
(
self
.
fastqc_r2_qc_files
[
"fastqc_data"
][
"path"
])
# mapping metrics settings
self
.
aln_metrics
=
summary
.
get
(
"bammetrics"
,
{}).
get
(
"stats"
,
{}).
get
(
"
alignment_m
etrics"
,
{})
self
.
aln_metrics
=
summary
.
get
(
"bammetrics"
,
{}).
get
(
"stats"
,
{}).
get
(
"
CollectAlignmentSummaryM
etrics"
,
{})
# insert size metrics files
self
.
inserts_metrics_files
=
summary
.
get
(
"bammetrics"
,
{}).
get
(
"files"
,
{}).
get
(
"insert_size_metrics"
,
{})
self
.
inserts_metrics_files
=
\
summary
.
get
(
"bammetrics"
,
{}).
get
(
"files"
,
{}).
get
(
"multi_metrics"
,
{})
# rna metrics files and stats
self
.
rna_metrics_files
=
summary
.
get
(
"gentrap"
,
{}).
get
(
"files"
,
{}).
get
(
"rna_metrics"
,
{})
_rmetrics
=
summary
.
get
(
"gentrap"
,
{}).
get
(
"stats"
,
{}).
get
(
"rna_metrics"
,
{})
self
.
rna_metrics_files
=
summary
.
get
(
"bammetrics"
,
{}).
get
(
"files"
,
{}).
get
(
"rna"
,
{})
_rmetrics
=
summary
.
get
(
"bammetrics"
,
{}).
get
(
"stats"
,
{}).
get
(
"rna"
,
{})
if
_rmetrics
:
if
"metrics"
in
_rmetrics
:
_rmetrics
=
_rmetrics
[
"metrics"
]
if
_rmetrics
:
self
.
rna_metrics
=
{
k
:
v
for
k
,
v
in
_rmetrics
.
items
()
}
pf_bases
=
float
(
_rmetrics
[
"
pf_bases
"
])
exonic_bases
=
int
(
_rmetrics
.
get
(
"
coding_bases
"
,
0
))
+
int
(
_rmetrics
.
get
(
"
utr_bases
"
,
0
))
pf_bases
=
float
(
_rmetrics
[
"
PF_BASES
"
])
exonic_bases
=
int
(
_rmetrics
.
get
(
"
CODING_BASES
"
,
0
))
+
int
(
_rmetrics
.
get
(
"
UTR_BASES
"
,
0
))
# picard uses pct_ but it's actually ratio ~ we follow their convention
pct_exonic_bases_all
=
exonic_bases
/
float
(
_rmetrics
[
"
pf_bases
"
])
pct_exonic_bases
=
exonic_bases
/
float
(
_rmetrics
.
get
(
"
pf_aligned_bases
"
,
0
))
pct_exonic_bases_all
=
exonic_bases
/
float
(
_rmetrics
[
"
PF_BASES
"
])
pct_exonic_bases
=
exonic_bases
/
float
(
_rmetrics
.
get
(
"
PF_ALIGNED_BASES
"
,
0
))
self
.
rna_metrics
.
update
({
"exonic_bases
"
:
exonic_bases
,
"pct_exonic_bases_all
"
:
pct_exonic_bases_all
,
"pct_exonic_bases
"
:
pct_exonic_bases
,
"pct_aligned_bases
"
:
1.0
,
"pct_aligned_bases_all
"
:
float
(
_rmetrics
.
get
(
"
pf_aligned_bases
"
,
0.0
))
/
pf_bases
,
"pct_coding_bases_all
"
:
float
(
_rmetrics
.
get
(
"
coding_bases
"
,
0.0
))
/
pf_bases
,
"pct_utr_bases_all
"
:
float
(
_rmetrics
.
get
(
"
utr_bases
"
,
0.0
))
/
pf_bases
,
"pct_intronic_bases_all
"
:
float
(
_rmetrics
.
get
(
"
intronic_bases
"
,
0.0
))
/
pf_bases
,
"pct_intergenic_bases_all
"
:
float
(
_rmetrics
.
get
(
"
intergenic_bases
"
,
0.0
))
/
pf_bases
,
})
if
_rmetrics
.
get
(
"
ribosomal_bases
"
,
""
)
!=
""
:
self
.
rna_metrics
[
"
pct_ribosomal_bases_all
"
]
=
float
(
_rmetrics
.
get
(
"
pf_ribosomal_bases
"
,
0.0
))
/
pf_bases
"EXONIC_BASES
"
:
exonic_bases
,
"PCT_EXONIC_BASES_ALL
"
:
pct_exonic_bases_all
,
"PCT_EXONIC_BASES
"
:
pct_exonic_bases
,
"PCT_ALIGNED_BASES
"
:
1.0
,
"PCT_ALIGNED_BASES_ALL
"
:
float
(
_rmetrics
.
get
(
"
PF_ALIGNED_BASES
"
,
0.0
))
/
pf_bases
,
"PCT_CODING_BASES_ALL
"
:
float
(
_rmetrics
.
get
(
"
CODING_BASES
"
,
0.0
))
/
pf_bases
,
"PCT_UTR_BASES_ALL
"
:
float
(
_rmetrics
.
get
(
"
UTR_BASES
"
,
0.0
))
/
pf_bases
,
"PCT_INTRONIC_BASES_ALL
"
:
float
(
_rmetrics
.
get
(
"
INTRONIC_BASES
"
,
0.0
))
/
pf_bases
,
"PCT_INTERGENIC_BASES_ALL
"
:
float
(
_rmetrics
.
get
(
"
INTERGENIC_BASES
"
,
0.0
))
/
pf_bases
,
})
if
_rmetrics
.
get
(
"
RIBOSOMAL_BASES
"
,
""
)
!=
""
:
self
.
rna_metrics
[
"
PCT_RIBOSOMAL_BASES_ALL
"
]
=
float
(
_rmetrics
.
get
(
"
PF_RIBOSOMAL_BASES
"
,
0.0
))
/
pf_bases
def
__repr__
(
self
):
return
"{0}(sample=
\"
{1}
\"
, lib=
\"
{2}
\"
)"
.
format
(
self
.
__class__
.
__name__
,
self
.
sample
.
name
,
self
.
name
)
self
.
__class__
.
__name__
,
self
.
sample
.
name
,
self
.
name
)
class
GentrapSample
(
object
):
...
...
@@ -458,40 +462,41 @@ class GentrapSample(object):
self
.
_raw
=
summary
self
.
is_paired_end
=
summary
.
get
(
"gentrap"
,
{}).
get
(
"stats"
,
{}).
get
(
"pipeline"
,
{})[
"all_paired"
]
# mapping metrics settings
self
.
aln_metrics
=
summary
.
get
(
"bammetrics"
,
{}).
get
(
"stats"
,
{}).
get
(
"
alignment_m
etrics"
,
{})
self
.
aln_metrics
=
summary
.
get
(
"bammetrics"
,
{}).
get
(
"stats"
,
{}).
get
(
"
CollectAlignmentSummaryM
etrics"
,
{})
# insert size metrics files
self
.
inserts_metrics_files
=
summary
.
get
(
"bammetrics"
,
{}).
get
(
"files"
,
{}).
get
(
"insert_size_metrics"
,
{})
self
.
inserts_metrics_files
=
\
summary
.
get
(
"bammetrics"
,
{}).
get
(
"files"
,
{}).
get
(
"CollectInsertSizeMetrics"
,
{}).
get
(
"metrics"
,
{})
# rna metrics files and stats
self
.
rna_metrics_files
=
summary
.
get
(
"gentrap"
,
{}).
get
(
"files"
,
{}).
get
(
"rna_metrics"
,
{})
_rmetrics
=
summary
.
get
(
"gentrap"
,
{}).
get
(
"stats"
,
{}).
get
(
"rna_metrics"
,
{})
self
.
rna_metrics_files
=
summary
.
get
(
"bammetrics"
,
{}).
get
(
"files"
,
{}).
get
(
"rna"
,
{})
_rmetrics
=
summary
.
get
(
"bammetrics"
,
{}).
get
(
"stats"
,
{}).
get
(
"rna"
,
{})
if
_rmetrics
:
if
"metrics"
in
_rmetrics
:
_rmetrics
=
_rmetrics
[
"metrics"
]
if
_rmetrics
:
self
.
rna_metrics
=
{
k
:
v
for
k
,
v
in
_rmetrics
.
items
()
}
pf_bases
=
float
(
_rmetrics
[
"
pf_bases
"
])
exonic_bases
=
int
(
_rmetrics
.
get
(
"
coding_bases
"
,
0
))
+
int
(
_rmetrics
.
get
(
"
utr_bases
"
,
0
))
pf_bases
=
float
(
_rmetrics
[
"
PF_BASES
"
])
exonic_bases
=
int
(
_rmetrics
.
get
(
"
CODING_BASES
"
,
0
))
+
int
(
_rmetrics
.
get
(
"
UTR_BASES
"
,
0
))
# picard uses pct_ but it's actually ratio ~ we follow their convention
pct_exonic_bases_all
=
exonic_bases
/
float
(
_rmetrics
[
"
pf_bases
"
])
pct_exonic_bases
=
exonic_bases
/
float
(
_rmetrics
.
get
(
"
pf_aligned_bases
"
,
0
))
pct_exonic_bases_all
=
exonic_bases
/
float
(
_rmetrics
[
"
PF_BASES
"
])
pct_exonic_bases
=
exonic_bases
/
float
(
_rmetrics
.
get
(
"
PF_ALIGNED_BASES
"
,
0
))
self
.
rna_metrics
.
update
({
"exonic_bases"
:
exonic_bases
,
"pct_exonic_bases_all"
:
pct_exonic_bases_all
,
"pct_exonic_bases"
:
pct_exonic_bases
,
"pct_aligned_bases"
:
1.0
,
"pct_aligned_bases_all"
:
float
(
_rmetrics
.
get
(
"pf_aligned_bases"
,
0.0
))
/
pf_bases
,
"pct_coding_bases_all"
:
float
(
_rmetrics
.
get
(
"coding_bases"
,
0.0
))
/
pf_bases
,
"pct_utr_bases_all"
:
float
(
_rmetrics
.
get
(
"utr_bases"
,
0.0
))
/
pf_bases
,
"pct_intronic_bases_all"
:
float
(
_rmetrics
.
get
(
"intronic_bases"
,
0.0
))
/
pf_bases
,
"pct_intergenic_bases_all"
:
float
(
_rmetrics
.
get
(
"intergenic_bases"
,
0.0
))
/
pf_bases
,
})
if
self
.
run
.
settings
[
"strand_protocol"
]
!=
"non_specific"
:
self
.
rna_metrics
.
update
({
"EXONIC_BASES"
:
exonic_bases
,
"PCT_EXONIC_BASES_ALL"
:
pct_exonic_bases_all
,
"PCT_EXONIC_BASES"
:
pct_exonic_bases
,
"PCT_ALIGNED_BASES"
:
1.0
,
"PCT_ALIGNED_BASES_ALL"
:
float
(
_rmetrics
.
get
(
"PF_ALIGNED_BASES"
,
0.0
))
/
pf_bases
,
"PCT_CODING_BASES_ALL"
:
float
(
_rmetrics
.
get
(
"CODING_BASES"
,
0.0
))
/
pf_bases
,
"PCT_UTR_BASES_ALL"
:
float
(
_rmetrics
.
get
(
"UTR_BASES"
,
0.0
))
/
pf_bases
,
"PCT_INTRONIC_BASES_ALL"
:
float
(
_rmetrics
.
get
(
"INTRONIC_BASES"
,
0.0
))
/
pf_bases
,
"PCT_INTERGENIC_BASES_ALL"
:
float
(
_rmetrics
.
get
(
"INTERGENIC_BASES"
,
0.0
))
/
pf_bases
,
})
if
_rmetrics
.
get
(
"
ribosomal_bases
"
,
""
)
!=
""
:
self
.
rna_metrics
[
"
pct_ribosomal_bases_all
"
]
=
float
(
_rmetrics
.
get
(
"
pf_ribosomal_bases
"
,
0.0
))
/
pf_bases
if
_rmetrics
.
get
(
"
RIBOSOMAL_BASES
"
,
""
)
!=
""
:
self
.
rna_metrics
[
"
PCT_RIBOSOMAL_BASES_ALL
"
]
=
float
(
_rmetrics
.
get
(
"
PF_RIBOSOMAL_BASES
"
,
0.0
))
/
pf_bases
self
.
lib_names
=
sorted
(
summary
[
"libraries"
].
keys
())
self
.
libs
=
\
{
l
:
GentrapLib
(
self
.
run
,
self
,
l
,
summary
[
"libraries"
][
l
])
\
for
l
in
self
.
lib_names
}
for
l
in
self
.
lib_names
}
def
__repr__
(
self
):
return
"{0}(
\"
{1}
\"
)"
.
format
(
self
.
__class__
.
__name__
,
self
.
name
)
...
...
@@ -521,7 +526,7 @@ class GentrapRun(object):
(
"tophat"
,
"alignment"
),
(
"star"
,
"alignment"
),
(
"htseqcount"
,
"fragment counting"
),
]
]
self
.
executables
=
{}
for
k
,
desc
in
executables
:
in_summary
=
self
.
all_executables
.
get
(
k
)
...
...
@@ -543,7 +548,7 @@ class GentrapRun(object):
self
.
sample_names
=
sorted
(
summary
[
"samples"
].
keys
())
self
.
samples
=
\
{
s
:
GentrapSample
(
self
,
s
,
summary
[
"samples"
][
s
])
\
for
s
in
self
.
sample_names
}
for
s
in
self
.
sample_names
}
self
.
libs
=
[]
for
sample
in
self
.
samples
.
values
():
self
.
libs
.
extend
(
sample
.
libs
.
values
())
...
...
@@ -556,19 +561,20 @@ class GentrapRun(object):
def
__repr__
(
self
):
return
"{0}(
\"
{1}
\"
)"
.
format
(
self
.
__class__
.
__name__
,
self
.
summary_file
)
self
.
summary_file
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"summary_file"
,
type
=
str
,
help
=
"Path to Gentrap summary file"
)
help
=
"Path to Gentrap summary file"
)
parser
.
add_argument
(
"template_file"
,
type
=
str
,
help
=
"Path to main template file"
)
help
=
"Path to main template file"
)
parser
.
add_argument
(
"logo_file"
,
type
=
str
,
help
=
"Path to main logo file"
)
help
=
"Path to main logo file"
)
args
=
parser
.
parse_args
()
run
=
GentrapRun
(
args
.
summary_file
)
write_template
(
run
,
args
.
template_file
,
args
.
logo_file
)
public/gentrap/src/main/resources/nl/lumc/sasc/biopet/pipelines/gentrap/templates/pdf/lib_mapping.tex
View file @
d6acc80f
...
...
@@ -47,11 +47,11 @@
% inferred insert size distribution
\subsubsection
{
Insert size distribution
}
\IfFileExists
{
((( lib.inserts
_
metrics
_
files.
output
_
histogram.path )))
}
\IfFileExists
{
((( lib.inserts
_
metrics
_
files.
insert
_
size
_
histogram.path )))
}
{
\begin{figure}
[h!]
\centering
\includegraphics
[width=0.7\textwidth]
{
((( lib.inserts
_
metrics
_
files.
output
_
histogram.path )))
}
\includegraphics
[width=0.7\textwidth]
{
((( lib.inserts
_
metrics
_
files.
insert
_
size
_
histogram.path )))
}
\caption
{
Distribution of insert size length of paired-end reads mapped to opposite strands.
}
\end{figure}
}
...
...
@@ -108,14 +108,5 @@
Ribosomal bases
&
((( lib.rna
_
metrics.ribosomal
_
bases|nice
_
int )))
&
((( lib.rna
_
metrics.pct
_
ribosomal
_
bases
_
all|float2nice
_
pct )))
\%
&
((( lib.rna
_
metrics.pct
_
ribosomal
_
bases|float2nice
_
pct )))
\%
\\
((* endif *))
\hline
Median 5' bias
&
((( lib.rna
_
metrics.median
_
5prime
_
bias )))
&
-
&
-
\\
Median 3' bias
&
((( lib.rna
_
metrics.median
_
3prime
_
bias )))
&
-
&
-
\\
Median 5' to 3' bias
&
((( lib.rna
_
metrics.median
_
5prime
_
to
_
3prime
_
bias )))
&
-
&
-
\\
\hline
((* if lib.run.settings.strand
_
protocol != "non
_
specific" *))
Correct strand reads
&
((( lib.rna
_
metrics.correct
_
strand
_
reads|nice
_
int )))
&
-
&
-
\\
Incorrect strand reads
&
((( lib.rna
_
metrics.incorrect
_
strand
_
reads|nice
_
int )))
&
-
&
-
\\
((* endif *))
\hline
\end{tabular}
\end{center}
public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala
View file @
d6acc80f
...
...
@@ -16,10 +16,8 @@
package
nl.lumc.sasc.biopet.pipelines.gentrap
import
java.io.File
import
scala.collection.JavaConverters._
import
scala.language.reflectiveCalls
import
htsjdk.samtools.reference._
import
org.broadinstitute.gatk.queue.QScript
import
org.broadinstitute.gatk.queue.function.QFunction
import
picard.analysis.directed.RnaSeqMetricsCollector.StrandSpecificity
...
...
@@ -89,6 +87,17 @@ class Gentrap(val root: Configurable) extends QScript
/** Whether to do simple variant calling on RNA or not */
var
callVariants
:
Boolean
=
config
(
"call_variants"
,
default
=
false
)
/** Settings for all Picard CollectRnaSeqMetrics runs */
private
def
collectRnaSeqMetricsSettings
:
Map
[
String
,
String
]
=
Map
(
"strand_specificity"
->
(
strandProtocol
match
{
case
NonSpecific
=>
StrandSpecificity
.
NONE
.
toString
case
Dutp
=>
StrandSpecificity
.
SECOND_READ_TRANSCRIPTION_STRAND
.
toString
case
otherwise
=>
throw
new
IllegalStateException
(
otherwise
.
toString
)
}))
++
(
ribosomalRefFlat
match
{
case
Some
(
rbs
)
=>
Map
(
"ribosomal_intervals"
->
rbs
.
toString
)
case
None
=>
Map
()
})
/** Default pipeline config */
override
def
defaults
=
ConfigUtils
.
mergeMaps
(
Map
(
...
...
@@ -103,7 +112,10 @@ class Gentrap(val root: Configurable) extends QScript
"programrecordid"
->
"null"
),
// disable markduplicates since it may not play well with all aligners (this can still be overriden via config)
"mapping"
->
Map
(
"skip_markduplicates"
->
true
)
"mapping"
->
Map
(
"skip_markduplicates"
->
true
,
"skip_metrics"
->
true
)
),
super
.
defaults
)
/** Adds output merge jobs for the given expression mode */
...
...
@@ -313,24 +325,6 @@ class Gentrap(val root: Configurable) extends QScript
job
}
/** General function to create CollectRnaSeqMetrics job, for per-sample and per-library runs */
protected
def
makeCollectRnaSeqMetricsJob
(
alnFile
:
File
,
outMetrics
:
File
,
outChart
:
Option
[
File
]
=
None
)
:
CollectRnaSeqMetrics
=
{
val
job
=
new
CollectRnaSeqMetrics
(
qscript
)
job
.
input
=
alnFile
job
.
output
=
outMetrics
job
.
refFlat
=
annotationRefFlat
job
.
chartOutput
=
outChart
job
.
assumeSorted
=
true
job
.
strandSpecificity
=
strandProtocol
match
{
case
NonSpecific
=>
Option
(
StrandSpecificity
.
NONE
.
toString
)
case
Dutp
=>
Option
(
StrandSpecificity
.
SECOND_READ_TRANSCRIPTION_STRAND
.
toString
)
case
_
=>
throw
new
IllegalStateException
}
job
.
ribosomalIntervals
=
ribosomalRefFlat
job
}
/** Steps to run before biopetScript */
def
init
()
:
Unit
=
{
// TODO: validate that exons are flattened or not (depending on another option flag?)
...
...
@@ -695,15 +689,11 @@ class Gentrap(val root: Configurable) extends QScript
mod
.
inputBam
=
alnFile
mod
.
outputDir
=
new
File
(
sampleDir
,
"metrics"
)
mod
.
sampleId
=
Option
(
sampleId
)
mod
.
transcriptRefFlatFile
=
Option
(
annotationRefFlat
)
mod
.
rnaMetricsSettings
=
collectRnaSeqMetricsSettings
mod
}
/** Picard CollectRnaSeqMetrics job, only when library > 1 */
private
lazy
val
collectRnaSeqMetricsJob
:
Option
[
CollectRnaSeqMetrics
]
=
(
libraries
.
size
>
1
)
.
option
{
makeCollectRnaSeqMetricsJob
(
alnFileDirty
,
createFile
(
".rna_metrics"
),
Option
(
createFile
(
".coverage_bias.pdf"
)))
}
/** Job for removing ribosomal reads */
private
def
wipeJob
:
Option
[
WipeReads
]
=
removeRibosomalReads
.
option
{
...
...
@@ -725,7 +715,7 @@ class Gentrap(val root: Configurable) extends QScript
/** Ln or MergeSamFile job, depending on how many inputs are supplied */
private
def
makeCombineJob
(
inFiles
:
List
[
File
],
outFile
:
File
,
mergeSortOrder
:
String
=
"coordinate"
)
:
CombineFileJobSet
=
{
require
(
inFiles
.
nonEmpty
,
"At least one input files for combine job"
)
require
(
inFiles
.
nonEmpty
,
"At least one input files
required
for combine job"
)
if
(
inFiles
.
size
==
1
)
{
val
jobBam
=
new
Ln
(
qscript
)
...
...
@@ -765,12 +755,6 @@ class Gentrap(val root: Configurable) extends QScript
addPerLibJobs
()
// merge or symlink per-library alignments
sampleAlnJobSet
.
addAll
()
// general RNA-seq metrics, if there are > 1 library
collectRnaSeqMetricsJob
match
{
case
Some
(
j
)
=>
add
(
j
);
addSummarizable
(
j
,
"rna_metrics"
)
case
None
=>
;
}
bamMetricsModule
match
{
case
Some
(
m
)
=>
m
.
init
()
...
...
@@ -827,10 +811,6 @@ class Gentrap(val root: Configurable) extends QScript
/** Alignment results of this library ~ can only be accessed after addJobs is run! */
def
alnFile
:
File
=
mappingJob
.
outputFiles
(
"finalBamFile"
)
/** Library-level RNA-seq metrics job, only when we have more than 1 library in the sample */
def
collectRnaSeqMetricsJob
:
CollectRnaSeqMetrics
=
makeCollectRnaSeqMetricsJob
(
alnFile
,
createFile
(
".rna_metrics"
),
Option
(
createFile
(
".coverage_bias.pdf"
)))
/** Wiggle track job */
private
lazy
val
bam2wigModule
:
Bam2Wig
=
Bam2Wig
(
qscript
,
alnFile
)
...
...
@@ -847,16 +827,29 @@ class Gentrap(val root: Configurable) extends QScript
job
}
/** Library metrics job, since we don't have access to the underlying metrics */
private
lazy
val
bamMetricsJob
:
BamMetrics
=
{
val
mod
=
new
BamMetrics
(
qscript
)
mod
.
inputBam
=
alnFile
mod
.
outputDir
=
new
File
(
libDir
,
"metrics"
)
mod
.
sampleId
=
Option
(
sampleId
)
mod
.
libId
=
Option
(
libId
)
mod
.
rnaMetricsSettings
=
collectRnaSeqMetricsSettings
mod
.
transcriptRefFlatFile
=
Option
(
annotationRefFlat
)
mod
}
/** Adds all jobs for the library */
def
addJobs
()
:
Unit
=
{
// create per-library alignment file
addAll
(
mappingJob
.
functions
)
// add bigwig track
addAll
(
bam2wigModule
.
functions
)
// create RNA metrics job
add
(
collectRnaSeqMetricsJob
)
addSummarizable
(
collectRnaSeqMetricsJob
,
"rna_metrics"
)
qscript
.
addSummaryQScript
(
mappingJob
)
bamMetricsJob
.
init
()
bamMetricsJob
.
biopetScript
()
addAll
(
bamMetricsJob
.
functions
)
qscript
.
addSummaryQScript
(
bamMetricsJob
)
}
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment