Skip to content
Snippets Groups Projects
Commit 5ca76501 authored by bow's avatar bow
Browse files

Merge branch 'feature-cnvpipeline' into 'develop'

Feature cnvpipeline

Work in progress

* [ ] - ConiferExome
* [ ] - ConiferExome Testing
* [x] - FreeC
* [ ] - FreeC testing
* [ ] - cnMOPS
* [ ] - cnMOPS testing
* [ ] - TITAN

Moved to next release:

* [ ] - HMMCopy
* [ ] - HMMCopy testing

See merge request !100
parents 0eeaa7b5 bb66d720
No related branches found
No related tags found
No related merge requests found
Showing
with 1055 additions and 166 deletions
......@@ -26,7 +26,6 @@ import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins }
import nl.lumc.sasc.biopet.pipelines.shiva.Shiva
import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
......
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library('cn.mops'))
suppressPackageStartupMessages(library('optparse'))
# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh
# modified to take arguments
option_list <- list(
make_option(c("--rawoutput"), dest="rawoutput"),
make_option(c("--cnv"), dest="cnv"),
make_option(c("--cnr"), dest="cnr"),
make_option(c("--chr"), dest="chr"),
make_option(c("--threads"), dest="threads", default=8, type="integer")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
arguments = parse_args(parser, positional_arguments=TRUE)
opt = arguments$options
args = arguments$args
chromosome <- opt$chr
CNVoutput <- opt$cnv
CNRoutput <- opt$cnr
bamFile <- args
BAMFiles <- c(bamFile)
bamDataRanges <- getReadCountsFromBAM(BAMFiles, mode="paired", refSeqName=chromosome, WL=1000, parallel=opt$threads)
write.table(as.data.frame( bamDataRanges ), quote = FALSE, opt$rawoutput, row.names=FALSE)
res <- cn.mops(bamDataRanges)
res <- calcIntegerCopyNumbers(res)
write.table(as.data.frame(cnvs(res)), quote = FALSE, CNVoutput, row.names=FALSE)
write.table(as.data.frame(cnvr(res)), quote = FALSE, CNRoutput, row.names=FALSE)
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
dir.create(chromosome, showWarnings=FALSE, recursive=TRUE, mode="0744")
# Plot chromosome per sample.
for ( i in 1:length(BAMFiles)){
png(file=paste(chromosome,"/",chromosome,"-segplot-",i,".png", sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white"
)
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
segplot(res,sampleIdx=i)
dev.off()
}
# Plot cnvr regions.
for ( i in 1:nrow(as.data.frame(cnvr(res)))) {
png(file=paste(chromosome,"/",chromosome,"-cnv-",i,".png",sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
plot(res,which=i,toFile=TRUE)
dev.off()
}
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library('cn.mops'))
suppressPackageStartupMessages(library('optparse'))
# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh
# modified to take arguments
option_list <- list(
make_option(c("--rawoutput"), dest="rawoutput"),
make_option(c("--cnv"), dest="cnv"),
make_option(c("--cnr"), dest="cnr"),
make_option(c("--chr"), dest="chr"),
make_option(c("--targetBed"), dest="targetBed"),
make_option(c("--threads"), dest="threads", default=8, type="integer")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
arguments = parse_args(parser, positional_arguments=TRUE)
opt = arguments$options
args = arguments$args
chromosome <- opt$chr
CNVoutput <- opt$cnv
CNRoutput <- opt$cnr
bamFile <- args
BAMFiles <- c(bamFile)
### WES Specific code
segments <- read.table(opt$targetBed, sep="\t", as.is=TRUE)
# filter the segments by the requested chromosome
segments <- segments[ segments[,1] == chromosome, ]
gr <- GRanges(segments[,1],IRanges(segments[,2],segments[,3]))
### END WES Specific code
bamDataRanges <- getSegmentReadCountsFromBAM(BAMFiles, GR=gr, mode="paired", parallel=opt$threads)
write.table(as.data.frame( bamDataRanges ), quote = FALSE, opt$rawoutput, row.names=FALSE)
res <- exomecn.mops(bamDataRanges)
res <- calcIntegerCopyNumbers(res)
write.table(as.data.frame(cnvs(res)), quote = FALSE, CNVoutput, row.names=FALSE)
write.table(as.data.frame(cnvr(res)), quote = FALSE, CNRoutput, row.names=FALSE)
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
dir.create(chromosome, showWarnings=FALSE, recursive=TRUE, mode="0744")
# Plot chromosome per sample.
for ( i in 1:length(BAMFiles)){
png(file=paste(chromosome,"/",chromosome,"-segplot-",i,".png", sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white"
)
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
segplot(res,sampleIdx=i)
dev.off()
}
# Plot cnvr regions.
for ( i in 1:nrow(as.data.frame(cnvr(res)))) {
png(file=paste(chromosome,"/",chromosome,"-cnv-",i,".png",sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
plot(res,which=i,toFile=TRUE)
dev.off()
}
library('optparse')
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list <- list(
make_option(c("-i", "--input"), dest="input"),
make_option(c("-o", "--output"), dest="output")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
opt = parse_args(parser)
#
# Load Data
#
dataTable <-read.table(opt$input, header=TRUE);
BAF<-data.frame(dataTable)
chromosomes <- levels(dataTable$Chromosome)
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
png(filename = opt$output, width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(6,4))
par(mar=plot_margins)
par(mgp=label_positions)
for (i in chromosomes) {
tt <- which(BAF$Chromosome==i)
if (length(tt)>0){
lBAF <-BAF[tt,]
plot(lBAF$Position,
lBAF$BAF,
ylim = c(-0.1,1.1),
xlab = paste ("position, chr",i),
ylab = "BAF",
pch = ".",
col = colors()[1])
tt <- which(lBAF$A==0.5)
points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[92])
tt <- which(lBAF$A!=0.5 & lBAF$A>=0)
points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[62])
tt <- 1
pres <- 1
if (length(lBAF$A)>4) {
for (j in c(2:(length(lBAF$A)-pres-1))) {
if (lBAF$A[j]==lBAF$A[j+pres]) {
tt[length(tt)+1] <- j
}
}
points(lBAF$Position[tt],lBAF$A[tt],pch = ".",col = colors()[24],cex=4)
points(lBAF$Position[tt],lBAF$B[tt],pch = ".",col = colors()[24],cex=4)
}
tt <- 1
pres <- 1
if (length(lBAF$FittedA)>4) {
for (j in c(2:(length(lBAF$FittedA)-pres-1))) {
if (lBAF$FittedA[j]==lBAF$FittedA[j+pres]) {
tt[length(tt)+1] <- j
}
}
points(lBAF$Position[tt],lBAF$FittedA[tt],pch = ".",col = colors()[463],cex=4)
points(lBAF$Position[tt],lBAF$FittedB[tt],pch = ".",col = colors()[463],cex=4)
}
}
}
dev.off()
library('optparse')
library('naturalsort')
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list <- list(
make_option(c("-m", "--mappability"), dest="mappability"),
make_option(c("-p", "--ploidy"), default=2, type="integer", dest="ploidy"),
make_option(c("-i", "--input"), dest="input"),
make_option(c("-o", "--output"), dest="output")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
opt = parse_args(parser)
#
# Load mappability track
#
mappabilityFile <- opt$mappability
mappabilityTrack <- read.table(mappabilityFile, header=FALSE, col.names=c("chrom", "start", "end", "score"))
mappabilityTrack$Start <- mappabilityTrack$start+1
mappabilityTrack$Chromosome <- gsub("chr", "", mappabilityTrack$chrom)
#
# Load Data
#
dataTable <- read.table( opt$input , header=TRUE)
input_ratio <- data.frame(dataTable)
chromosomes <- naturalsort(levels(input_ratio$Chromosome))
input_ratio$Chromosome <- factor(input_ratio$Chromosome, levels=chromosomes, ordered=T)
sorted_ratio <- input_ratio[order(input_ratio$Chromosome),]
ratio <- merge(sorted_ratio, mappabilityTrack, sort=TRUE)
ratio <- ratio[order(ratio$Chromosome, ratio$Start),]
ploidy <- opt$ploidy
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
maxLevelToPlot <- 3
for (i in c(1:length(ratio$Ratio))) {
if (ratio$Ratio[i]>maxLevelToPlot) {
ratio$Ratio[i]=maxLevelToPlot
}
}
#
# Plot the graphs per chromosome
#
for (i in chromosomes) {
png(filename = paste(opt$output, ".", i,".png",sep=""), width = 4 * ppi, height = 2.5 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
tt <- which(ratio$Chromosome==i)
if (length(tt)>0) {
plot(ratio$Start[tt],
ratio$Ratio[tt]*ploidy,
ylim = c(0,maxLevelToPlot*ploidy),
xlab = paste ("position, chr",i),
ylab = "normalized CN",
pch = ".",
col = colors()[88])
title(outer=TRUE)
tt <- which(ratio$Chromosome==i & ratio$CopyNumber>ploidy )
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136])
tt <- which(ratio$Chromosome==i & ratio$Ratio==maxLevelToPlot & ratio$CopyNumber>ploidy)
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136],cex=4)
tt <- which(ratio$Chromosome==i & ratio$CopyNumber<ploidy & ratio$CopyNumber!= -1)
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[461])
tt <- which(ratio$Chromosome==i)
#UNCOMMENT HERE TO SEE THE PREDICTED COPY NUMBER LEVEL:
#points(ratio$Start[tt],ratio$CopyNumber[tt], pch = ".", col = colors()[24],cex=4)
}
#tt <- which(ratio$Chromosome==i)
#UNCOMMENT HERE TO SEE THE EVALUATED MEDIAN LEVEL PER SEGMENT:
#points(ratio$Start[tt],ratio$MedianRatio[tt]*ploidy, pch = ".", col = colors()[463],cex=4)
dev.off()
}
png(filename = paste(opt$output, ".png",sep=""), width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(6,4))
par(mar=plot_margins)
par(mgp=label_positions)
for (i in chromosomes) {
tt <- which(ratio$Chromosome==i)
if (length(tt)>0) {
plot(ratio$Start[tt],
ratio$Ratio[tt]*ploidy,
ylim = c(0,maxLevelToPlot*ploidy),
xlab = paste ("position, chr",i),
ylab = "normalized CN",
pch = ".",
col = colors()[88])
tt <- which(ratio$Chromosome==i & ratio$CopyNumber>ploidy )
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136])
tt <- which(ratio$Chromosome==i & ratio$Ratio==maxLevelToPlot & ratio$CopyNumber>ploidy)
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136],cex=4)
tt <- which(ratio$Chromosome==i & ratio$CopyNumber<ploidy & ratio$CopyNumber!= -1)
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[461])
tt <- which(ratio$Chromosome==i)
#UNCOMMENT HERE TO SEE THE PREDICTED COPY NUMBER LEVEL:
#points(ratio$Start[tt],ratio$CopyNumber[tt], pch = ".", col = colors()[24],cex=4)
}
#tt <- which(ratio$Chromosome==i)
#UNCOMMENT HERE TO SEE THE EVALUATED MEDIAN LEVEL PER SEGMENT:
#points(ratio$Start[tt],ratio$MedianRatio[tt]*ploidy, pch = ".", col = colors()[463],cex=4)
}
dev.off()
# Export the whole genome graph
png(filename = paste(opt$output, ".wg.png",sep=""), width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
plot_margins <- c(3,4,2,2)+0.1
label_positions <- c(2,0.5,0)
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
par(xaxs="i", yaxs="i")
maxLevelToPlot <- 3
for (i in c(1:length(ratio$Ratio))) {
if (ratio$Ratio[i]>maxLevelToPlot) {
ratio$Ratio[i]=maxLevelToPlot
}
}
for (i in c(1:length(ratio$Start))) {
ratio$Position[i] = (i-1) *5000 +1
}
plotRatioLT <- 0.10
filteredSet <- ratio[ ratio$score > plotRatioLT, ]
plot(filteredSet$Position,
filteredSet$Ratio*ploidy,
ylim = c(0,maxLevelToPlot*ploidy),
xlab = paste ("Chr. on genome"),
ylab = "normalized CN",
pch = ".",
col = colors()[88])
title(outer=TRUE)
tt <- which(filteredSet$CopyNumber>ploidy)
points(filteredSet$Position[tt],filteredSet$Ratio[tt]*ploidy,pch = ".",col = colors()[136])
tt <- which(filteredSet$Ratio==maxLevelToPlot & filteredSet$CopyNumber>ploidy)
points(filteredSet$Position[tt],filteredSet$Ratio[tt]*ploidy,pch = ".",col = colors()[136],cex=4)
tt <- which(filteredSet$CopyNumber<ploidy & filteredSet$CopyNumber!= -1)
points(filteredSet$Position[tt],filteredSet$Ratio[tt]*ploidy,pch = ".",col = colors()[461], bg="black")
for (chrom in chromosomes) {
tt <- which(filteredSet$Chromosome == chrom)
print(filteredSet[tt[1],])
xpos <- filteredSet$Position[tt][1]
abline(v=xpos, col="grey")
axis(3, at=xpos, labels=chrom , las=2)
}
dev.off()
library(rtracklayer)
library('optparse')
library(GenomicRanges);
library(IRanges);
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list <- list(
make_option(c("-c", "--cnv"), dest="cnv"),
make_option(c("-r", "--ratios"), dest="ratios"),
make_option(c("-o", "--output"), dest="output")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
opt = parse_args(parser)
dataTable <-read.table(opt$ratios, header=TRUE);
ratio<-data.frame(dataTable)
dataTable <- read.table(opt$cnv, header=FALSE)
cnvs<- data.frame(dataTable)
ratio$Ratio[which(ratio$Ratio==-1)]=NA
cnvs.bed=GRanges(cnvs[,1],IRanges(cnvs[,2],cnvs[,3]))
ratio.bed=GRanges(ratio$Chromosome,IRanges(ratio$Start,ratio$Start),score=ratio$Ratio)
overlaps <- subsetByOverlaps(ratio.bed,cnvs.bed)
normals <- setdiff(ratio.bed,cnvs.bed)
normals <- subsetByOverlaps(ratio.bed,normals)
#mu <- mean(score(normals),na.rm=TRUE)
#sigma<- sd(score(normals),na.rm=TRUE)
#hist(score(normals),n=500,xlim=c(0,2))
#hist(log(score(normals)),n=500,xlim=c(-1,1))
#shapiro.test(score(normals)[which(!is.na(score(normals)))][5001:10000])
#qqnorm (score(normals)[which(!is.na(score(normals)))],ylim=(c(0,10)))
#qqline(score(normals)[which(!is.na(score(normals)))], col = 2)
#shapiro.test(log(score(normals))[which(!is.na(score(normals)))][5001:10000])
#qqnorm (log(score(normals))[which(!is.na(score(normals)))],ylim=(c(-6,10)))
#qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2)
numberOfCol=length(cnvs)
for (i in c(1:length(cnvs[,1]))) {
values <- score(subsetByOverlaps(ratio.bed,cnvs.bed[i]))
#wilcox.test(values,mu=mu)
W <- function(values,normals){resultw <- try(wilcox.test(values,score(normals)), silent = TRUE)
if(class(resultw)=="try-error") return(list("statistic"=NA,"parameter"=NA,"p.value"=NA,"null.value"=NA,"alternative"=NA,"method"=NA,"data.name"=NA)) else resultw}
KS <- function(values,normals){resultks <- try(ks.test(values,score(normals)), silent = TRUE)
if(class(resultks)=="try-error") return(list("statistic"=NA,"p.value"=NA,"alternative"=NA,"method"=NA,"data.name"=NA)) else resultks}
#resultks <- try(KS <- ks.test(values,score(normals)), silent = TRUE)
# if(class(resultks)=="try-error") NA) else resultks
cnvs[i,numberOfCol+1]=W(values,normals)$p.value
cnvs[i,numberOfCol+2]=KS(values,normals)$p.value
}
if (numberOfCol==5) {
names(cnvs)=c("chr","start","end","copy number","status","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
if (numberOfCol==7) {
names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
if (numberOfCol==9) {
names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","somatic/germline","precentageOfGermline","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
write.table(cnvs, file=opt$output, sep="\t",quote=F,row.names=F)
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2015 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Wrapper for the Cnmops command line tool.
* Written based on Cnmops version v2.2.1.
*/
class Cnmops(val root: Configurable) extends RscriptCommandLineFunction {
protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/cnmops.R")
@Input(doc = "Input file BAM", required = true)
var input: List[File] = List()
// output files, computed automatically from output directory
@Output(doc = "Output CNV file")
private lazy val outputCnv: File = {
require(outputDir == null, "Unexpected error when trying to set cn.MOPS CNV output")
new File(outputDir, "cnv.txt")
}
@Output(doc = "Output CNR file")
private lazy val outputCnr: File = {
require(outputDir == null, "Unexpected error when trying to set cn.MOPS CNR output")
new File(outputDir, "cnr.txt")
}
/** write all output files to this directory [./] */
var outputDir: String = _
override def beforeGraph = {
super.beforeGraph
require(!outputDir.isEmpty, "Outputdir for cn.MOPS should not be empty")
require(input.length >= 2, "Please supply at least 2 BAM files for cn.MOPS")
}
override def cmdLine = super.cmdLine +
required(input.foreach(f => f.getAbsolutePath).toString.mkString(" "))
}
......@@ -21,7 +21,7 @@ import nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction
abstract class Conifer extends PythonCommandLineFunction with Version {
override def subPath = "conifer" :: super.subPath
// executable = config("exe", default = "conifer")
setPythonScript(config("script", default = "conifer"))
setPythonScript(config("script", default = "conifer.py", namespace = "conifer"))
def versionRegex = """(.*)""".r
override def versionExitcode = List(0)
def versionCommand = executable + " " + pythonScript + " --version"
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.freec
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline._
class FreeC(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
@Input(doc = "BAMfile", required = true)
var input: File = _
var inputFormat: Option[String] = config("inputFormat")
var outputPath: File = _
@Output(doc = "Output", shortName = "out")
protected var output: File = _
@Output(doc = "FreeC GC_profile")
private var _gcProfile: File = _
def gcProfile = new File(outputPath, "GC_profile.cnp")
@Output(doc = "FreeC Read numbers per bin")
private var _sampleBins: File = _
def sampleBins = new File(outputPath, input.getName + "_sample.cpn")
@Output
private var _cnvOutput: File = _
def cnvOutput = new File(outputPath, input.getName + "_CNVs")
@Output
private var _bafOutput: File = _
def bafOutput = new File(outputPath, input.getName + "_BAF.txt")
@Output
private var _ratioOutput: File = _
def ratioOutput = new File(outputPath, input.getName + "_ratio.txt")
@Output
private var _ratioBedGraph: File = _
def ratioBedGraph = new File(outputPath, input.getName + "_ratio.BedGraph")
executable = config("exe", default = "freec")
var bedGraphOutput: Boolean = config("BedGraphOutput", default = false)
var _bedtools: File = config("exe", default = "bedtools", namespace = "bedtools")
var bedtools: Option[String] = config("bedtools", default = _bedtools, freeVar = false)
var breakPointThreshold: Option[Double] = config("breakPointThreshold")
var breakPointType: Option[Int] = config("breakPointType")
var chrFiles: File = config("chrFiles")
var chrLenFile: File = config("chrLenFile")
var coefficientOfVariation: Option[Double] = config("coefficientOfVariation")
var contamination: Option[Double] = config("contamination")
var contaminationAdjustment: Boolean = config("contaminationAdjustment", default = false)
var degree: Option[String] = config("degree")
var forceGCcontentNormalization: Option[Int] = config("forceGCcontentNormalization")
var gcContentProfile: Option[File] = config("GCcontentProfile")
var gemMappabilityFile: Option[String] = config("gemMappabilityFile")
var intercept: Option[Int] = config("intercept")
var minCNAlength: Option[Int] = config("minCNAlength")
var minMappabilityPerWindow: Option[Double] = config("minMappabilityPerWindow")
var minExpectedGC: Option[Double] = config("minExpectedGC")
var maxExpectedGC: Option[Double] = config("maxExpectedGC")
var minimalSubclonePresence: Option[Int] = config("minimalSubclonePresence")
var maxThreads: Int = getThreads
var noisyData: Boolean = config("noisyData", default = false)
//var outputDir: File
var ploidy: Option[String] = config("ploidy")
var printNA: Boolean = config("printNA", default = false)
var readCountThreshold: Option[Int] = config("readCountThreshold")
var _sambamba: File = config("exe", namespace = "sambamba", default = "sambamba")
var sambamba: File = config("sambamba", default = _sambamba, freeVar = false)
var sambambaThreads: Option[Int] = config("SambambaThreads")
var _samtools: File = config("exe", namespace = "samtools", default = "samtools")
var samtools: File = config("samtools", default = _samtools, freeVar = false)
var sex: Option[String] = config("sex")
var step: Option[Int] = config("step")
var telocentromeric: Option[Int] = config("telocentromeric")
var uniqueMatch: Boolean = config("uniqueMatch", default = false)
var window: Option[Int] = config("window")
/** [sample] options */
// var mateFile: File = input
var mateCopyNumberFile: Option[File] = config("mateCopyNumberFile")
// var inputFormat: Option[String] = config("inputFormat")
var mateOrientation: Option[String] = config("mateOrientation")
/** [BAF] options */
var snpFile: Option[File] = config("snpFile")
var minimalCoveragePerPosition: Option[Int] = config("minimalCoveragePerPosition")
var makePileup: Option[File] = config("makePileup")
var fastaFile: Option[File] = config("fastaFile")
var minimalQualityPerPosition: Option[Int] = config("minimalQualityPerPosition")
var shiftInQuality: Option[Int] = config("shiftInQuality")
/** [target] */
var captureRegions: Option[File] = config("captureRegions")
// Control-FREEC v8.7 : calling copy number alterations and LOH regions using deep-sequencing data
override def versionCommand = executable
override def versionRegex = """Control-FREEC v([0-9\.]+) : .*""".r
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
private var configFile: File = _
override def beforeGraph {
super.beforeGraph
_gcProfile = gcProfile
_sampleBins = sampleBins
_cnvOutput = cnvOutput
_bafOutput = bafOutput
_ratioOutput = ratioOutput
_ratioBedGraph = ratioBedGraph
configFile = new File(outputPath, input.getName + ".freec_config.txt")
output = cnvOutput
}
override def beforeCmd: Unit = {
super.beforeCmd
outputPath.mkdirs()
logger.info("Creating FREEC config file: " + configFile.getAbsolutePath)
createConfigFile
}
protected def createConfigFile = {
val writer = new PrintWriter(configFile)
val conf: String = "[general]" + "\n" +
conditional(bedGraphOutput, "BedGraphOutput=TRUE", escape = false) + "\n" +
required("bedtools=", bedtools, spaceSeparated = false, escape = false) + "\n" +
optional("breakPointThreshold=", breakPointThreshold, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("breakPointType=", breakPointType, suffix = "", spaceSeparated = false, escape = false) + "\n" +
required("chrFiles=", chrFiles, spaceSeparated = false, escape = false) + "\n" +
required("chrLenFile=", chrLenFile, spaceSeparated = false, escape = false) + "\n" +
optional("coefficientOfVariation=", coefficientOfVariation, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("contamination=", contamination, suffix = "", spaceSeparated = false, escape = false) + "\n" +
conditional(contaminationAdjustment, "contaminationAdjustment=TRUE", escape = false) + "\n" +
optional("degree=", degree, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("forceGCcontentNormalization=", forceGCcontentNormalization, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("GCcontentProfile=", gcContentProfile, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("gemMappabilityFile=", gemMappabilityFile, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("intercept=", intercept, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("minCNAlength=", minCNAlength, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("minMappabilityPerWindow=", minMappabilityPerWindow, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("minExpectedGC=", minExpectedGC, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("maxExpectedGC=", maxExpectedGC, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("minimalSubclonePresence=", minimalSubclonePresence, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("maxThreads=", getThreads, suffix = "", spaceSeparated = false, escape = false) + "\n" +
conditional(noisyData, "noisyData=TRUE", escape = false) + "\n" +
required("outputDir=", outputPath, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("ploidy=", ploidy, suffix = "", spaceSeparated = false, escape = false) + "\n" +
conditional(printNA, "printNA=TRUE", escape = false) + "\n" +
optional("readCountThreshold=", readCountThreshold, suffix = "", spaceSeparated = false, escape = false) + "\n" +
required("sambamba=", sambamba, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("SambambaThreads=", sambambaThreads, suffix = "", spaceSeparated = false, escape = false) + "\n" +
required("samtools=", samtools, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("sex=", sex, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("step=", step, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("telocentromeric=", telocentromeric, suffix = "", spaceSeparated = false, escape = false) + "\n" +
conditional(uniqueMatch, "uniqueMatch=TRUE", escape = false) + "\n" +
optional("window=", window, suffix = "", spaceSeparated = false, escape = false) + "\n" +
"[sample]" + "\n" +
required("mateFile=", input, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("mateCopyNumberFile=", mateCopyNumberFile, suffix = "", spaceSeparated = false, escape = false) + "\n" +
required("inputFormat=", inputFormat, suffix = "", spaceSeparated = false, escape = false) + "\n" +
required("mateOrientation=", mateOrientation, suffix = "", spaceSeparated = false, escape = false) + "\n" +
"[BAF]" + "\n" +
optional("SNPfile=", snpFile, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("minimalCoveragePerPosition=", minimalCoveragePerPosition, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("makePileup=", makePileup, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("fastaFile=", fastaFile, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("minimalQualityPerPosition=", minimalQualityPerPosition, suffix = "", spaceSeparated = false, escape = false) + "\n" +
optional("shiftInQuality=", shiftInQuality, suffix = "", spaceSeparated = false, escape = false) + "\n" +
"[target]" + "\n" +
optional("captureRegions=", captureRegions, suffix = "", spaceSeparated = false, escape = false) + "\n"
writer.write(conf)
writer.close()
}
def cmdLine = required(executable) +
required("--conf", configFile)
}
package nl.lumc.sasc.biopet.extensions.freec
import java.io.File
import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class FreeCAssessSignificancePlot(val root: Configurable) extends RscriptCommandLineFunction {
protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/freec/freec_assess_significance.R")
@Input(doc = "Output file from FreeC. *_CNV", required = true)
var cnv: File = null
@Input(doc = "Output file from FreeC. *_ratio.txt", required = true)
var ratios: File = null
@Output(doc = "Destination for the PNG file", required = true)
var output: File = null
/* cmdLine to execute R-script and with arguments
* Arguments should be pasted in the same order as the script is expecting it.
* Unless some R library is used for named arguments
* */
override def cmdLine: String = {
super.cmdLine +
required("--cnv", cnv) +
required("--ratios", ratios) +
required("--output", output)
}
}
package nl.lumc.sasc.biopet.extensions.freec
import java.io.File
import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class FreeCBAFPlot(val root: Configurable) extends RscriptCommandLineFunction {
protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/freec/freec_BAFPlot.R")
@Input(doc = "Output file from FreeC. *_BAF.txt")
var input: File = null
@Output(doc = "Destination for the PNG file")
var output: File = null
/* cmdLine to execute R-script and with arguments
* Arguments should be pasted in the same order as the script is expecting it.
* Unless some R library is used for named arguments
* */
override def cmdLine: String = {
super.cmdLine +
required("-i", input) +
required("-o", output)
}
}
package nl.lumc.sasc.biopet.extensions.freec
import java.io.File
import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class FreeCCNVPlot(val root: Configurable) extends RscriptCommandLineFunction {
protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/freec/freec_CNVPlot.R")
@Input(doc = "Output file from FreeC. *_CNV", required = true)
var input: File = null
@Output(doc = "Destination for the PNG file", required = true)
var output: File = null
/**
* cmdLine to execute R-script and with arguments
* Arguments should be pasted in the same order as the script is expecting it.
* Unless some R library is used for named arguments
*/
override def cmdLine = super.cmdLine +
required("-i", input) +
required("-o", output)
}
......@@ -13,55 +13,31 @@
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
package nl.lumc.sasc.biopet.extensions.sambamba
import java.io.{ BufferedWriter, File, FileWriter }
import java.io.File
import argonaut._
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.io.Source
class SambambaMpileup(val root: Configurable) extends Sambamba {
override val defaultThreads = 4
class ConiferSummary(val root: Configurable) extends InProcessFunction with Configurable {
def filterCalls(callFile: File, outFile: File, sampleName: String): Unit = {
// val filename = callFile.getAbsolutePath
val writer = new BufferedWriter(new FileWriter(outFile))
@Input(doc = "Bam File")
var input: List[File] = Nil
for (line <- Source.fromFile(callFile).getLines()) {
line.startsWith(sampleName) || line.startsWith("sampleID") match {
case true => writer.write(line + "\n");
case _ =>
}
}
writer.close()
}
this.analysisName = getClass.getSimpleName
@Input(doc = "deps")
var deps: List[File] = Nil
@Output(doc = "Summary output", required = true)
var out: File = _
@Input(doc = "calls")
var calls: File = _
var label: String = _
var coniferPipeline: ConiferPipeline = root match {
case pipeline: ConiferPipeline => pipeline
case _ =>
throw new IllegalStateException("Root is no instance of ConiferPipeline")
}
@Output(doc = "Output file", required = false)
var output: File = null
var resources: Map[String, Json] = Map()
val buffer: Option[Int] = config("buffer", default = 8 * 1024 * 1024)
override def run() {
logger.debug("Start")
filterCalls(calls, out, label)
logger.debug("Stop")
def cmdLine = {
required(executable) +
required("mpileup") +
optional("-t", threads) +
optional("-b", buffer) +
repeat(input) + " | " +
"pigz -9 -p " + threads + " -i -c > " +
output.getAbsolutePath
}
}
......@@ -27,6 +27,7 @@ object BiopetExecutableMain extends BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics,
nl.lumc.sasc.biopet.pipelines.sage.Sage,
nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig,
nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu,
nl.lumc.sasc.biopet.pipelines.carp.Carp,
nl.lumc.sasc.biopet.pipelines.toucan.Toucan,
nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling,
......
......@@ -33,8 +33,7 @@ trait BiopetExecutable extends Logging {
val modules: Map[String, List[MainCommand]] = Map(
"pipeline" -> pipelines,
"tool" -> tools
)
"tool" -> tools)
/**
* @param args the command line arguments
......
......@@ -43,5 +43,17 @@
<artifactId>BiopetExtensions</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.10</artifactId>
<version>2.2.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
import java.io.File
import nl.lumc.sasc.biopet.utils.config._
import nl.lumc.sasc.biopet.core.{ PipelineCommand, _ }
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.conifer.{ ConiferAnalyze, ConiferCall, ConiferRPKM }
import org.broadinstitute.gatk.queue.QScript
class ConiferPipeline(val root: Configurable) extends QScript with BiopetQScript {
//*
// Kopisu - Coniferpipeline is a pipeline that can run standalone
// */
def this() = this(null)
/** Input bamfile */
@Input(doc = "Bamfile to start from", fullName = "bam", shortName = "bam", required = true)
var inputBam: File = _
@Argument(doc = "Label this sample with a name/ID [0-9a-zA-Z] and [-_]",
fullName = "label",
shortName = "label", required = false)
var sampleLabel: String = _
/** Exon definitions in bed format */
@Input(doc = "Exon definition file in bed format", fullName = "exon_bed", shortName = "bed", required = false)
var probeFile: File = config("probeFile")
@Input(doc = "Previous RPKM files (controls)", fullName = "rpkm_controls", shortName = "rc", required = false)
var controlsDir: File = config("controlsDir")
@Argument(doc = "Enable RPKM only mode, generate files for reference db", shortName = "rpkmonly", required = false)
var RPKMonly: Boolean = false
val summary = new ConiferSummary(this)
def init() {
}
def input2RPKM(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".txt"
else swapExt(inputBam.getName, ".bam", ".txt")
}
def input2HDF5(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".hdf5"
else swapExt(inputBam.getName, ".bam", ".hdf5")
}
def input2Calls(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".calls.txt"
else swapExt(inputBam.getName, ".bam", "calls.txt")
}
def biopetScript(): Unit = {
/** Setup RPKM directory */
val sampleDir: String = outputDir
val RPKMdir: File = new File(sampleDir + File.separator + "RPKM" + File.separator)
RPKMdir.mkdir()
val coniferRPKM = new ConiferRPKM(this)
coniferRPKM.bamFile = this.inputBam.getAbsoluteFile
coniferRPKM.probes = this.probeFile
coniferRPKM.output = new File(RPKMdir, input2RPKM(inputBam))
add(coniferRPKM)
if (!RPKMonly) {
/** Collect the rpkm_output to a temp directory, where we merge with the control files */
var refRPKMlist: List[File] = Nil
// Sync the .txt only, these files contain the RPKM Values
for (controlRPKMfile <- controlsDir.list.filter(_.toLowerCase.endsWith(".txt"))) {
val target = new File(RPKMdir, controlRPKMfile)
val source = new File(controlsDir, controlRPKMfile)
if (!target.exists) {
add(Ln(this, source, target, relative = false))
refRPKMlist :+= target
} else if (!target.equals(source)) {
target.delete()
add(Ln(this, source, target, relative = false))
refRPKMlist :+= target
}
}
val coniferAnalyze = new ConiferAnalyze(this)
coniferAnalyze.deps = List(coniferRPKM.output) ++ refRPKMlist
coniferAnalyze.probes = this.probeFile
coniferAnalyze.rpkmDir = RPKMdir
coniferAnalyze.output = new File(sampleDir, input2HDF5(inputBam))
add(coniferAnalyze)
val coniferCall = new ConiferCall(this)
coniferCall.input = coniferAnalyze.output
coniferCall.output = new File(sampleDir, "calls.txt")
add(coniferCall)
summary.deps = List(coniferCall.output)
summary.label = sampleLabel
summary.calls = coniferCall.output
summary.out = new File(sampleDir, input2Calls(inputBam))
add(summary)
}
}
}
object ConiferPipeline extends PipelineCommand
......@@ -15,60 +15,67 @@
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
import nl.lumc.sasc.biopet.pipelines.kopisu.methods.{ ConiferMethod, FreecMethod }
import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
import org.broadinstitute.gatk.queue.QScript
class Kopisu(val root: Configurable) extends QScript with MultiSampleQScript {
import scala.language.reflectiveCalls
class Kopisu(val root: Configurable) extends QScript with SummaryQScript with Reference {
qscript =>
def this() = this(null)
@Input(doc = "Input bamfile", required = true)
var bamFile: File = config("bam")
@Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
protected[kopisu] var inputBamsArg: List[File] = Nil
def init() {
if (!outputDir.endsWith("/")) outputDir += "/"
}
var inputBams: Map[String, File] = Map()
def biopetScript() {
addSamplesJobs()
def init(): Unit = {
if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
if (inputBams.isEmpty) Logging.addError("No input bams found")
}
def summaryFile: File = new File(outputDir, "Kopisu.summary.json")
//TODO: Add summary
def summaryFiles: Map[String, File] = Map()
//TODO: Add summary
def summarySettings: Map[String, Any] = Map()
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
//TODO: Add summary
def summaryFiles: Map[String, File] = Map()
//TODO: Add summary
def summaryStats: Map[String, Any] = Map()
lazy val freecMethod = if (config("use_freec_method", default = true)) {
Some(new FreecMethod(this))
} else None
def makeLibrary(id: String) = new Library(id)
class Library(libId: String) extends AbstractLibrary(libId) {
//TODO: Add summary
def summaryFiles: Map[String, File] = Map()
lazy val coniferMethod = if (config("use_conifer_method", default = false)) {
Some(new ConiferMethod(this))
} else None
//TODO: Add summary
def summaryStats: Map[String, Any] = Map()
def addJobs(): Unit = {
// This script is in fact FreeC only.
def biopetScript() {
if (freecMethod.isEmpty && coniferMethod.isEmpty) Logging.addError("No method selected")
}
freecMethod.foreach { method =>
method.inputBams = inputBams
method.outputDir = new File(outputDir, "freec_method")
add(method)
}
def addJobs(): Unit = {
coniferMethod.foreach { method =>
method.inputBams = inputBams
method.outputDir = new File(outputDir, "conifer_method")
add(method)
}
}
def addMultiSampleJobs(): Unit = {
addSummaryJobs()
}
/** Must return a map with used settings for this pipeline */
def summarySettings: Map[String, Any] = Map(
"reference" -> referenceSummary,
"freec_method" -> freecMethod.isDefined
)
/** File to put in the summary for thie pipeline */
def summaryFiles: Map[String, File] = inputBams.map(x => s"inputbam_${x._1}" -> x._2)
/** Name of summary output file */
def summaryFile: File = new File(outputDir, "kopisu.summary.json")
}
object Kopisu extends PipelineCommand
package nl.lumc.sasc.biopet.pipelines.kopisu.methods
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.Reference
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvanthof on 10/05/16.
*/
trait CnvMethod extends QScript with SummaryQScript with Reference {
/** Name of mode, this should also be used in the config */
def name: String
var namePrefix: String = name
var inputBams: Map[String, File] = Map.empty
/** Name of summary output file */
def summaryFile: File = new File(outputDir, s"$name.summary.json")
/** Must return a map with used settings for this pipeline */
def summarySettings: Map[String, Any] = Map()
/** File to put in the summary for thie pipeline */
def summaryFiles: Map[String, File] = inputBams.map(x => s"inputbam_${x._1}" -> x._2)
def init() = {}
}
package nl.lumc.sasc.biopet.pipelines.kopisu.methods
import java.io.File
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.conifer.{ ConiferAnalyze, ConiferCall, ConiferRPKM }
import nl.lumc.sasc.biopet.utils.config.Configurable
/**
* Created by pjvanthof on 10/05/16.
*/
class ConiferMethod(val root: Configurable) extends CnvMethod {
def name = "conifer"
/** Exon definitions in bed format */
var probeFile: File = config("probe_file")
var controlsDir: File = config("controls_dir")
/**Enable RPKM only mode, generate files for reference db */
var RPKMonly: Boolean = false
def biopetScript: Unit = {
val RPKMdir = new File(outputDir, "rpkm")
val rpkmFiles: List[File] = inputBams.map {
case (sampleName, bamFile) =>
val coniferRPKM = new ConiferRPKM(this)
coniferRPKM.bamFile = bamFile.getAbsoluteFile
coniferRPKM.probes = this.probeFile
coniferRPKM.output = new File(RPKMdir, s"$sampleName.rpkm.txt")
add(coniferRPKM)
coniferRPKM.output
}.toList ++ controlsDir.list.filter(_.toLowerCase.endsWith(".txt")).map { path =>
val oldFile = new File(path)
val newFile = new File(RPKMdir, s"control.${oldFile.getName}")
add(Ln(this, oldFile, newFile))
newFile
}
inputBams.foreach {
case (sampleName, bamFile) =>
val sampleDir = new File(outputDir, "samples" + File.separator + sampleName)
val coniferAnalyze = new ConiferAnalyze(this)
coniferAnalyze.deps ++= rpkmFiles
coniferAnalyze.probes = probeFile
coniferAnalyze.rpkmDir = RPKMdir
coniferAnalyze.output = new File(sampleDir, s"$sampleName.hdf5")
add(coniferAnalyze)
val coniferCall = new ConiferCall(this)
coniferCall.input = coniferAnalyze.output
coniferCall.output = new File(sampleDir, s"${sampleName}.calls.txt")
add(coniferCall)
}
addSummaryJobs()
}
override def summaryFiles = super.summaryFiles ++ Map("probe_file" -> probeFile)
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment