Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
B
biopet.biopet
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
Mirrors
biopet.biopet
Commits
2b833eaf
Commit
2b833eaf
authored
Jul 26, 2017
by
pjvan_thof
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Adding corrections for cores in the job reports
parent
df582f1a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
66 additions
and
19 deletions
+66
-19
biopet-core/src/main/resources/org/broadinstitute/gatk/queue/util/queueJobReport.R
...urces/org/broadinstitute/gatk/queue/util/queueJobReport.R
+66
-19
No files found.
biopet-core/src/main/resources/org/broadinstitute/gatk/queue/util/queueJobReport.R
View file @
2b833eaf
...
...
@@ -31,7 +31,7 @@ ORIGINAL_UNITS_TO_RUNTIME_UNITS = 1/1000/60/60
# Helper function to aggregate all of the jobs in the report across all tables
#
allJobsFromReport
<-
function
(
report
)
{
names
<-
c
(
"jobName"
,
"startTime"
,
"analysisName"
,
"doneTime"
,
"exechosts"
,
"runtime"
)
names
<-
c
(
"jobName"
,
"startTime"
,
"analysisName"
,
"doneTime"
,
"exechosts"
,
"runtime"
,
"cores"
)
sub
<-
lapply
(
report
,
function
(
table
)
table
[,
names
])
do.call
(
"rbind"
,
sub
)
}
...
...
@@ -102,6 +102,43 @@ plotProgressByTime <- function(gatkReport) {
print
(
p
)
}
plotCoresByTime
<-
function
(
gatkReport
)
{
allJobs
=
allJobsFromReport
(
gatkReport
)
nJobs
=
sum
(
allJobs
$
cores
)
allJobs
=
allJobs
[
order
(
allJobs
$
startTime
,
decreasing
=
F
),]
allJobs
$
index
=
1
:
nrow
(
allJobs
)
minTime
=
min
(
allJobs
$
startTime
)
allJobs
$
relStartTime
=
allJobs
$
startTime
-
minTime
allJobs
$
relDoneTime
=
allJobs
$
doneTime
-
minTime
times
=
sort
(
c
(
allJobs
$
relStartTime
,
allJobs
$
relDoneTime
))
countJobs
<-
function
(
p
)
{
s
=
allJobs
$
relStartTime
e
=
allJobs
$
relDoneTime
cpu
=
allJobs
$
cores
x
=
c
()
# I wish I knew how to make this work with apply
for
(
time
in
times
)
x
=
c
(
x
,
sum
(
p
(
s
,
e
,
time
)
*
cpu
))
x
}
pending
=
countJobs
(
function
(
s
,
e
,
t
)
s
>
t
)
done
=
countJobs
(
function
(
s
,
e
,
t
)
e
<
t
)
running
=
nJobs
-
pending
-
done
d
=
data.frame
(
times
=
times
,
running
=
running
)
p
<-
ggplot
(
data
=
melt
(
d
,
id.vars
=
c
(
"times"
)),
aes
(
x
=
times
,
y
=
value
,
color
=
variable
))
p
<-
p
+
facet_grid
(
variable
~
.
,
scales
=
"free"
)
p
<-
p
+
geom_line
(
size
=
2
)
p
<-
p
+
xlab
(
paste
(
"Time since start of first job"
,
RUNTIME_UNITS
))
p
<-
p
+
ggtitle
(
"Cores used in time"
)
print
(
p
)
}
#
# Creates tables for each job in this group
#
...
...
@@ -113,13 +150,13 @@ plotGroup <- function(groupTable) {
sub
=
sub
[
order
(
sub
$
iteration
,
sub
$
jobName
,
decreasing
=
F
),
]
# create a table showing each job and all annotations
textplot
(
sub
,
show.rownames
=
F
)
title
(
paste
(
"Job summary for"
,
name
,
"full itemization"
),
cex
=
3
)
#
textplot(sub, show.rownames=F)
#
title(paste("Job summary for", name, "full itemization"), cex=3)
# create the table for each combination of values in the group, listing iterations in the columns
sum
=
cast
(
melt
(
sub
,
id.vars
=
groupAnnotations
,
measure.vars
=
c
(
"runtime"
)),
...
~
iteration
,
fun.aggregate
=
mean
)
textplot
(
as.data.frame
(
sum
),
show.rownames
=
F
)
title
(
paste
(
"Job summary for"
,
name
,
"itemizing each iteration"
),
cex
=
3
)
#
sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean)
#
textplot(as.data.frame(sum), show.rownames=F)
#
title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
# histogram of job times by groupAnnotations
if
(
length
(
groupAnnotations
)
==
1
&&
dim
(
sub
)[
1
]
>
1
)
{
...
...
@@ -131,14 +168,14 @@ plotGroup <- function(groupTable) {
}
# as above, but averaging over all iterations
groupAnnotationsNoIteration
=
setdiff
(
groupAnnotations
,
"iteration"
)
if
(
dim
(
sub
)[
1
]
>
1
)
{
try
({
# need a try here because we will fail to reduce when there's just a single iteration
sum
=
cast
(
melt
(
sub
,
id.vars
=
groupAnnotationsNoIteration
,
measure.vars
=
c
(
"runtime"
)),
...
~
.
,
fun.aggregate
=
c
(
mean
,
sd
))
textplot
(
as.data.frame
(
sum
),
show.rownames
=
F
)
title
(
paste
(
"Job summary for"
,
name
,
"averaging over all iterations"
),
cex
=
3
)
},
silent
=
T
)
}
#
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
#
if ( dim(sub)[1] > 1 ) {
#
try({ # need a try here because we will fail to reduce when there's just a single iteration
#
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
#
textplot(as.data.frame(sum), show.rownames=F)
#
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
#
}, silent=T)
#
}
}
# print out some useful basic information
...
...
@@ -147,6 +184,7 @@ print(paste("Project :", inputFileName))
convertUnits
<-
function
(
gatkReportData
)
{
convertGroup
<-
function
(
g
)
{
if
(
is.null
(
g
$
cores
))
{
g
$
cores
=
1
}
g
$
runtime
=
g
$
runtime
*
ORIGINAL_UNITS_TO_RUNTIME_UNITS
g
$
startTime
=
g
$
startTime
*
ORIGINAL_UNITS_TO_RUNTIME_UNITS
g
$
doneTime
=
g
$
doneTime
*
ORIGINAL_UNITS_TO_RUNTIME_UNITS
...
...
@@ -195,7 +233,8 @@ mergeScattersForAnalysis <- function(table) {
intermediate
=
intermediate
[
1
],
startTime
=
min
(
startTime
),
doneTime
=
min
(
startTime
)
+
sum
(
runtime
),
runtime
=
sum
(
runtime
))
runtime
=
sum
(
runtime
),
cores
=
min
(
cores
))
}
mergeScatters
<-
function
(
report
)
{
...
...
@@ -218,18 +257,26 @@ if ( ! is.na(outputPDF) ) {
plotJobsGantt
(
gatkReportData
,
T
,
"All jobs, by analysis, by start time"
,
F
)
plotJobsGantt
(
gatkReportData
,
F
,
"All jobs, sorted by start time"
,
F
)
plotProgressByTime
(
gatkReportData
)
plotCoresByTime
(
gatkReportData
)
# plots summarizing overall costs, merging scattered counts
merged.by.scatter
=
mergeScatters
(
gatkReportData
)
plotJobsGantt
(
merged.by.scatter
,
F
,
"Jobs merged by scatter by start time"
,
T
)
merged.as.df
=
do.call
(
rbind.data.frame
,
merged.by.scatter
)[,
c
(
"analysisName"
,
"runtime"
)]
merged.as.df
$
percent
=
merged.as.df
$
runtime
/
sum
(
merged.as.df
$
runtime
)
*
100
merged.as.df.formatted
=
data.frame
(
analysisName
=
merged.as.df
$
analysisName
,
runtime
=
prettyNum
(
merged.as.df
$
runtime
),
percent
=
prettyNum
(
merged.as.df
$
percent
,
digits
=
2
))
merged.as.df
=
do.call
(
rbind.data.frame
,
merged.by.scatter
)[,
c
(
"analysisName"
,
"runtime"
,
"cores"
)]
merged.as.df
$
cputime
=
merged.as.df
$
runtime
*
merged.as.df
$
cores
merged.as.df
$
percent
=
merged.as.df
$
cputime
/
sum
(
merged.as.df
$
cputime
)
*
100
merged.as.df.formatted
=
data.frame
(
analysisName
=
merged.as.df
$
analysisName
,
walltime
=
prettyNum
(
merged.as.df
$
runtime
),
cores
=
merged.as.df
$
cores
,
cputime
=
prettyNum
(
merged.as.df
$
cputime
),
percent
=
prettyNum
(
merged.as.df
$
percent
,
digits
=
2
))
textplot
(
merged.as.df.formatted
[
order
(
merged.as.df
$
runtime
),],
show.rownames
=
F
)
title
(
"Total runtime for each analysis"
)
plotTimeByHost
(
gatkReportData
)
#
plotTimeByHost(gatkReportData)
for
(
group
in
gatkReportData
)
{
#print(group)
plotGroup
(
group
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment