Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
B
biopet.biopet
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
Mirrors
biopet.biopet
Commits
0ef2b27c
Commit
0ef2b27c
authored
Jul 28, 2017
by
Peter van 't Hof
Committed by
GitHub
Jul 28, 2017
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #170 from biopet/fix-BIOPET-758
Adding cpu hours to the Queue job report
parents
6085a932
cccd07cf
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
68 additions
and
19 deletions
+68
-19
biopet-core/src/main/resources/org/broadinstitute/gatk/queue/util/queueJobReport.R
...urces/org/broadinstitute/gatk/queue/util/queueJobReport.R
+68
-19
No files found.
biopet-core/src/main/resources/org/broadinstitute/gatk/queue/util/queueJobReport.R
View file @
0ef2b27c
...
@@ -31,7 +31,7 @@ ORIGINAL_UNITS_TO_RUNTIME_UNITS = 1/1000/60/60
...
@@ -31,7 +31,7 @@ ORIGINAL_UNITS_TO_RUNTIME_UNITS = 1/1000/60/60
# Helper function to aggregate all of the jobs in the report across all tables
# Helper function to aggregate all of the jobs in the report across all tables
#
#
allJobsFromReport
<-
function
(
report
)
{
allJobsFromReport
<-
function
(
report
)
{
names
<-
c
(
"jobName"
,
"startTime"
,
"analysisName"
,
"doneTime"
,
"exechosts"
,
"runtime"
)
names
<-
c
(
"jobName"
,
"startTime"
,
"analysisName"
,
"doneTime"
,
"exechosts"
,
"runtime"
,
"cores"
)
sub
<-
lapply
(
report
,
function
(
table
)
table
[,
names
])
sub
<-
lapply
(
report
,
function
(
table
)
table
[,
names
])
do.call
(
"rbind"
,
sub
)
do.call
(
"rbind"
,
sub
)
}
}
...
@@ -102,6 +102,43 @@ plotProgressByTime <- function(gatkReport) {
...
@@ -102,6 +102,43 @@ plotProgressByTime <- function(gatkReport) {
print
(
p
)
print
(
p
)
}
}
plotCoresByTime
<-
function
(
gatkReport
)
{
allJobs
=
allJobsFromReport
(
gatkReport
)
nJobs
=
sum
(
allJobs
$
cores
)
allJobs
=
allJobs
[
order
(
allJobs
$
startTime
,
decreasing
=
F
),]
allJobs
$
index
=
1
:
nrow
(
allJobs
)
minTime
=
min
(
allJobs
$
startTime
)
allJobs
$
relStartTime
=
allJobs
$
startTime
-
minTime
allJobs
$
relDoneTime
=
allJobs
$
doneTime
-
minTime
times
=
sort
(
c
(
allJobs
$
relStartTime
,
allJobs
$
relDoneTime
))
countJobs
<-
function
(
p
)
{
s
=
allJobs
$
relStartTime
e
=
allJobs
$
relDoneTime
cpu
=
allJobs
$
cores
x
=
c
()
# I wish I knew how to make this work with apply
for
(
time
in
times
)
x
=
c
(
x
,
sum
(
p
(
s
,
e
,
time
)
*
cpu
))
x
}
pending
=
countJobs
(
function
(
s
,
e
,
t
)
s
>
t
)
done
=
countJobs
(
function
(
s
,
e
,
t
)
e
<
t
)
running
=
nJobs
-
pending
-
done
d
=
data.frame
(
times
=
times
,
running
=
running
)
p
<-
ggplot
(
data
=
melt
(
d
,
id.vars
=
c
(
"times"
)),
aes
(
x
=
times
,
y
=
value
,
color
=
variable
))
p
<-
p
+
facet_grid
(
variable
~
.
,
scales
=
"free"
)
p
<-
p
+
geom_line
(
size
=
2
)
p
<-
p
+
xlab
(
paste
(
"Time since start of first job"
,
RUNTIME_UNITS
))
p
<-
p
+
ggtitle
(
"Cores used in time"
)
print
(
p
)
}
#
#
# Creates tables for each job in this group
# Creates tables for each job in this group
#
#
...
@@ -113,13 +150,13 @@ plotGroup <- function(groupTable) {
...
@@ -113,13 +150,13 @@ plotGroup <- function(groupTable) {
sub
=
sub
[
order
(
sub
$
iteration
,
sub
$
jobName
,
decreasing
=
F
),
]
sub
=
sub
[
order
(
sub
$
iteration
,
sub
$
jobName
,
decreasing
=
F
),
]
# create a table showing each job and all annotations
# create a table showing each job and all annotations
textplot
(
sub
,
show.rownames
=
F
)
#
textplot(sub, show.rownames=F)
title
(
paste
(
"Job summary for"
,
name
,
"full itemization"
),
cex
=
3
)
#
title(paste("Job summary for", name, "full itemization"), cex=3)
# create the table for each combination of values in the group, listing iterations in the columns
# create the table for each combination of values in the group, listing iterations in the columns
sum
=
cast
(
melt
(
sub
,
id.vars
=
groupAnnotations
,
measure.vars
=
c
(
"runtime"
)),
...
~
iteration
,
fun.aggregate
=
mean
)
#
sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean)
textplot
(
as.data.frame
(
sum
),
show.rownames
=
F
)
#
textplot(as.data.frame(sum), show.rownames=F)
title
(
paste
(
"Job summary for"
,
name
,
"itemizing each iteration"
),
cex
=
3
)
#
title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
# histogram of job times by groupAnnotations
# histogram of job times by groupAnnotations
if
(
length
(
groupAnnotations
)
==
1
&&
dim
(
sub
)[
1
]
>
1
)
{
if
(
length
(
groupAnnotations
)
==
1
&&
dim
(
sub
)[
1
]
>
1
)
{
...
@@ -131,14 +168,14 @@ plotGroup <- function(groupTable) {
...
@@ -131,14 +168,14 @@ plotGroup <- function(groupTable) {
}
}
# as above, but averaging over all iterations
# as above, but averaging over all iterations
groupAnnotationsNoIteration
=
setdiff
(
groupAnnotations
,
"iteration"
)
#
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
if
(
dim
(
sub
)[
1
]
>
1
)
{
#
if ( dim(sub)[1] > 1 ) {
try
({
# need a try here because we will fail to reduce when there's just a single iteration
#
try({ # need a try here because we will fail to reduce when there's just a single iteration
sum
=
cast
(
melt
(
sub
,
id.vars
=
groupAnnotationsNoIteration
,
measure.vars
=
c
(
"runtime"
)),
...
~
.
,
fun.aggregate
=
c
(
mean
,
sd
))
#
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
textplot
(
as.data.frame
(
sum
),
show.rownames
=
F
)
#
textplot(as.data.frame(sum), show.rownames=F)
title
(
paste
(
"Job summary for"
,
name
,
"averaging over all iterations"
),
cex
=
3
)
#
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
},
silent
=
T
)
#
}, silent=T)
}
#
}
}
}
# print out some useful basic information
# print out some useful basic information
...
@@ -147,6 +184,7 @@ print(paste("Project :", inputFileName))
...
@@ -147,6 +184,7 @@ print(paste("Project :", inputFileName))
convertUnits
<-
function
(
gatkReportData
)
{
convertUnits
<-
function
(
gatkReportData
)
{
convertGroup
<-
function
(
g
)
{
convertGroup
<-
function
(
g
)
{
if
(
is.null
(
g
$
cores
))
{
g
$
cores
=
1
}
g
$
runtime
=
g
$
runtime
*
ORIGINAL_UNITS_TO_RUNTIME_UNITS
g
$
runtime
=
g
$
runtime
*
ORIGINAL_UNITS_TO_RUNTIME_UNITS
g
$
startTime
=
g
$
startTime
*
ORIGINAL_UNITS_TO_RUNTIME_UNITS
g
$
startTime
=
g
$
startTime
*
ORIGINAL_UNITS_TO_RUNTIME_UNITS
g
$
doneTime
=
g
$
doneTime
*
ORIGINAL_UNITS_TO_RUNTIME_UNITS
g
$
doneTime
=
g
$
doneTime
*
ORIGINAL_UNITS_TO_RUNTIME_UNITS
...
@@ -195,7 +233,8 @@ mergeScattersForAnalysis <- function(table) {
...
@@ -195,7 +233,8 @@ mergeScattersForAnalysis <- function(table) {
intermediate
=
intermediate
[
1
],
intermediate
=
intermediate
[
1
],
startTime
=
min
(
startTime
),
startTime
=
min
(
startTime
),
doneTime
=
min
(
startTime
)
+
sum
(
runtime
),
doneTime
=
min
(
startTime
)
+
sum
(
runtime
),
runtime
=
sum
(
runtime
))
runtime
=
sum
(
runtime
),
cores
=
min
(
cores
))
}
}
mergeScatters
<-
function
(
report
)
{
mergeScatters
<-
function
(
report
)
{
...
@@ -218,18 +257,28 @@ if ( ! is.na(outputPDF) ) {
...
@@ -218,18 +257,28 @@ if ( ! is.na(outputPDF) ) {
plotJobsGantt
(
gatkReportData
,
T
,
"All jobs, by analysis, by start time"
,
F
)
plotJobsGantt
(
gatkReportData
,
T
,
"All jobs, by analysis, by start time"
,
F
)
plotJobsGantt
(
gatkReportData
,
F
,
"All jobs, sorted by start time"
,
F
)
plotJobsGantt
(
gatkReportData
,
F
,
"All jobs, sorted by start time"
,
F
)
plotProgressByTime
(
gatkReportData
)
plotProgressByTime
(
gatkReportData
)
plotCoresByTime
(
gatkReportData
)
# plots summarizing overall costs, merging scattered counts
# plots summarizing overall costs, merging scattered counts
merged.by.scatter
=
mergeScatters
(
gatkReportData
)
merged.by.scatter
=
mergeScatters
(
gatkReportData
)
plotJobsGantt
(
merged.by.scatter
,
F
,
"Jobs merged by scatter by start time"
,
T
)
plotJobsGantt
(
merged.by.scatter
,
F
,
"Jobs merged by scatter by start time"
,
T
)
merged.as.df
=
do.call
(
rbind.data.frame
,
merged.by.scatter
)[,
c
(
"analysisName"
,
"runtime"
)]
merged.as.df
=
do.call
(
rbind.data.frame
,
merged.by.scatter
)[,
c
(
"analysisName"
,
"runtime"
,
"cores"
)]
merged.as.df
$
cputime
=
merged.as.df
$
runtime
*
merged.as.df
$
cores
merged.as.df
$
percent
=
merged.as.df
$
runtime
/
sum
(
merged.as.df
$
runtime
)
*
100
merged.as.df
$
percent
=
merged.as.df
$
runtime
/
sum
(
merged.as.df
$
runtime
)
*
100
merged.as.df.formatted
=
data.frame
(
analysisName
=
merged.as.df
$
analysisName
,
runtime
=
prettyNum
(
merged.as.df
$
runtime
),
percent
=
prettyNum
(
merged.as.df
$
percent
,
digits
=
2
))
merged.as.df
$
percentCpu
=
merged.as.df
$
cputime
/
sum
(
merged.as.df
$
cputime
)
*
100
textplot
(
merged.as.df.formatted
[
order
(
merged.as.df
$
runtime
),],
show.rownames
=
F
)
merged.as.df.formatted
=
data.frame
(
analysisName
=
merged.as.df
$
analysisName
,
walltime
=
prettyNum
(
merged.as.df
$
runtime
),
percent
=
prettyNum
(
merged.as.df
$
percent
,
digits
=
2
),
cores
=
merged.as.df
$
cores
,
cputime
=
prettyNum
(
merged.as.df
$
cputime
),
percentCpu
=
prettyNum
(
merged.as.df
$
percentCpu
,
digits
=
2
))
textplot
(
merged.as.df.formatted
[
order
(
merged.as.df
$
percentCpu
),],
show.rownames
=
F
)
title
(
"Total runtime for each analysis"
)
title
(
"Total runtime for each analysis"
)
plotTimeByHost
(
gatkReportData
)
#
plotTimeByHost(gatkReportData)
for
(
group
in
gatkReportData
)
{
for
(
group
in
gatkReportData
)
{
#print(group)
#print(group)
plotGroup
(
group
)
plotGroup
(
group
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment