Commit 0ef2b27c authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge pull request #170 from biopet/fix-BIOPET-758

Adding cpu hours to the Queue job report
parents 6085a932 cccd07cf
...@@ -31,7 +31,7 @@ ORIGINAL_UNITS_TO_RUNTIME_UNITS = 1/1000/60/60 ...@@ -31,7 +31,7 @@ ORIGINAL_UNITS_TO_RUNTIME_UNITS = 1/1000/60/60
# Helper function to aggregate all of the jobs in the report across all tables # Helper function to aggregate all of the jobs in the report across all tables
# #
allJobsFromReport <- function(report) { allJobsFromReport <- function(report) {
names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts", "runtime") names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts", "runtime", "cores")
sub <- lapply(report, function(table) table[,names]) sub <- lapply(report, function(table) table[,names])
do.call("rbind", sub) do.call("rbind", sub)
} }
...@@ -102,6 +102,43 @@ plotProgressByTime <- function(gatkReport) { ...@@ -102,6 +102,43 @@ plotProgressByTime <- function(gatkReport) {
print(p) print(p)
} }
plotCoresByTime <- function(gatkReport) {
allJobs = allJobsFromReport(gatkReport)
nJobs = sum(allJobs$cores)
allJobs = allJobs[order(allJobs$startTime, decreasing=F),]
allJobs$index = 1:nrow(allJobs)
minTime = min(allJobs$startTime)
allJobs$relStartTime = allJobs$startTime - minTime
allJobs$relDoneTime = allJobs$doneTime - minTime
times = sort(c(allJobs$relStartTime, allJobs$relDoneTime))
countJobs <- function(p) {
s = allJobs$relStartTime
e = allJobs$relDoneTime
cpu = allJobs$cores
x = c() # I wish I knew how to make this work with apply
for ( time in times )
x = c(x, sum(p(s, e, time) * cpu))
x
}
pending = countJobs(function(s, e, t) s > t)
done = countJobs(function(s, e, t) e < t)
running = nJobs - pending - done
d = data.frame(times=times, running=running)
p <- ggplot(data=melt(d, id.vars=c("times")), aes(x=times, y=value, color=variable))
p <- p + facet_grid(variable ~ ., scales="free")
p <- p + geom_line(size=2)
p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS))
p <- p + ggtitle("Cores used in time")
print(p)
}
# #
# Creates tables for each job in this group # Creates tables for each job in this group
# #
...@@ -113,13 +150,13 @@ plotGroup <- function(groupTable) { ...@@ -113,13 +150,13 @@ plotGroup <- function(groupTable) {
sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ] sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ]
# create a table showing each job and all annotations # create a table showing each job and all annotations
textplot(sub, show.rownames=F) # textplot(sub, show.rownames=F)
title(paste("Job summary for", name, "full itemization"), cex=3) # title(paste("Job summary for", name, "full itemization"), cex=3)
# create the table for each combination of values in the group, listing iterations in the columns # create the table for each combination of values in the group, listing iterations in the columns
sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean) # sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean)
textplot(as.data.frame(sum), show.rownames=F) # textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "itemizing each iteration"), cex=3) # title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
# histogram of job times by groupAnnotations # histogram of job times by groupAnnotations
if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) { if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
...@@ -131,14 +168,14 @@ plotGroup <- function(groupTable) { ...@@ -131,14 +168,14 @@ plotGroup <- function(groupTable) {
} }
# as above, but averaging over all iterations # as above, but averaging over all iterations
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") # groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
if ( dim(sub)[1] > 1 ) { # if ( dim(sub)[1] > 1 ) {
try({ # need a try here because we will fail to reduce when there's just a single iteration # try({ # need a try here because we will fail to reduce when there's just a single iteration
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) # sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
textplot(as.data.frame(sum), show.rownames=F) # textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "averaging over all iterations"), cex=3) # title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
}, silent=T) # }, silent=T)
} # }
} }
# print out some useful basic information # print out some useful basic information
...@@ -147,6 +184,7 @@ print(paste("Project :", inputFileName)) ...@@ -147,6 +184,7 @@ print(paste("Project :", inputFileName))
convertUnits <- function(gatkReportData) { convertUnits <- function(gatkReportData) {
convertGroup <- function(g) { convertGroup <- function(g) {
if (is.null(g$cores)) {g$cores = 1}
g$runtime = g$runtime * ORIGINAL_UNITS_TO_RUNTIME_UNITS g$runtime = g$runtime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
g$startTime = g$startTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS g$startTime = g$startTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
...@@ -195,7 +233,8 @@ mergeScattersForAnalysis <- function(table) { ...@@ -195,7 +233,8 @@ mergeScattersForAnalysis <- function(table) {
intermediate = intermediate[1], intermediate = intermediate[1],
startTime = min(startTime), startTime = min(startTime),
doneTime = min(startTime) + sum(runtime), doneTime = min(startTime) + sum(runtime),
runtime = sum(runtime)) runtime = sum(runtime),
cores = min(cores))
} }
mergeScatters <- function(report) { mergeScatters <- function(report) {
...@@ -218,18 +257,28 @@ if ( ! is.na(outputPDF) ) { ...@@ -218,18 +257,28 @@ if ( ! is.na(outputPDF) ) {
plotJobsGantt(gatkReportData, T, "All jobs, by analysis, by start time", F) plotJobsGantt(gatkReportData, T, "All jobs, by analysis, by start time", F)
plotJobsGantt(gatkReportData, F, "All jobs, sorted by start time", F) plotJobsGantt(gatkReportData, F, "All jobs, sorted by start time", F)
plotProgressByTime(gatkReportData) plotProgressByTime(gatkReportData)
plotCoresByTime(gatkReportData)
# plots summarizing overall costs, merging scattered counts # plots summarizing overall costs, merging scattered counts
merged.by.scatter = mergeScatters(gatkReportData) merged.by.scatter = mergeScatters(gatkReportData)
plotJobsGantt(merged.by.scatter, F, "Jobs merged by scatter by start time", T) plotJobsGantt(merged.by.scatter, F, "Jobs merged by scatter by start time", T)
merged.as.df = do.call(rbind.data.frame, merged.by.scatter)[,c("analysisName", "runtime")] merged.as.df = do.call(rbind.data.frame, merged.by.scatter)[,c("analysisName", "runtime", "cores")]
merged.as.df$cputime = merged.as.df$runtime * merged.as.df$cores
merged.as.df$percent = merged.as.df$runtime / sum(merged.as.df$runtime) * 100 merged.as.df$percent = merged.as.df$runtime / sum(merged.as.df$runtime) * 100
merged.as.df.formatted = data.frame(analysisName=merged.as.df$analysisName,runtime=prettyNum(merged.as.df$runtime), percent=prettyNum(merged.as.df$percent,digits=2)) merged.as.df$percentCpu = merged.as.df$cputime / sum(merged.as.df$cputime) * 100
textplot(merged.as.df.formatted[order(merged.as.df$runtime),], show.rownames=F) merged.as.df.formatted = data.frame(
analysisName=merged.as.df$analysisName,
walltime=prettyNum(merged.as.df$runtime),
percent=prettyNum(merged.as.df$percent,digits=2),
cores=merged.as.df$cores,
cputime=prettyNum(merged.as.df$cputime),
percentCpu=prettyNum(merged.as.df$percentCpu,digits=2))
textplot(merged.as.df.formatted[order(merged.as.df$percentCpu),], show.rownames=F)
title("Total runtime for each analysis") title("Total runtime for each analysis")
plotTimeByHost(gatkReportData) #plotTimeByHost(gatkReportData)
for ( group in gatkReportData ) { for ( group in gatkReportData ) {
#print(group) #print(group)
plotGroup(group) plotGroup(group)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment