Commit 33c611e0 authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'develop' into fix-bedtools_coverage_sorted

Conflicts:
	bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
parents c708df45 160ed2d3
......@@ -17,15 +17,19 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics
import java.io.File
import nl.lumc.sasc.biopet.core.annotations.{ AnnotationRefFlat, RibosomalRefFlat }
import nl.lumc.sasc.biopet.core.annotations.{AnnotationRefFlat, RibosomalRefFlat}
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect }
import nl.lumc.sasc.biopet.core.{BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag}
import nl.lumc.sasc.biopet.extensions.bedtools.{BedtoolsCoverage, BedtoolsIntersect, BedtoolsSort}
import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat
import nl.lumc.sasc.biopet.utils.intervals.BedCheck
import org.broadinstitute.gatk.queue.QScript
class BamMetrics(val root: Configurable) extends QScript
......@@ -74,6 +78,8 @@ class BamMetrics(val root: Configurable) extends QScript
/** executed before script */
def init(): Unit = {
inputFiles :+= new InputFile(inputBam)
ampliconBedFile.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
roiBedFiles.foreach(BedCheck.checkBedFileToReference(_, referenceFasta(), biopetError = true))
}
/** Script to add jobs */
......
......@@ -61,9 +61,9 @@ class BamMetricsTest extends TestNGSuite with Matchers {
def testBamMetrics(rois: Int, amplicon: Boolean, rna: Boolean, wgs: Boolean) = {
val map = ConfigUtils.mergeMaps(Map("output_dir" -> BamMetricsTest.outputDir, "rna_metrics" -> rna, "wgs_metrics" -> wgs),
Map(BamMetricsTest.executables.toSeq: _*)) ++
(if (amplicon) Map("amplicon_bed" -> "amplicon.bed") else Map()) ++
(if (amplicon) Map("amplicon_bed" -> BamMetricsTest.ampliconBed.getAbsolutePath) else Map()) ++
(if (rna) Map("annotation_refflat" -> "transcripts.refFlat") else Map()) ++
Map("regions_of_interest" -> (1 to rois).map("roi_" + _ + ".bed").toList)
Map("regions_of_interest" -> (1 to rois).map(BamMetricsTest.roi(_).getAbsolutePath).toList)
val bammetrics: BamMetrics = initPipeline(map)
bammetrics.inputBam = BamMetricsTest.bam
......@@ -94,6 +94,14 @@ object BamMetricsTest {
val bam = new File(outputDir, "input" + File.separator + "bla.bam")
Files.touch(bam)
val ampliconBed = new File(outputDir, "input" + File.separator + "amplicon_bed.bed")
Files.touch(ampliconBed)
def roi(i: Int): File = {
val roi = new File(outputDir, "input" + File.separator + s"roi${i}.bed")
Files.touch(roi)
roi
}
private def copyFile(name: String): Unit = {
val is = getClass.getResourceAsStream("/" + name)
......
......@@ -26,7 +26,6 @@ import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins }
import nl.lumc.sasc.biopet.pipelines.shiva.Shiva
import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
......
......@@ -17,6 +17,7 @@
<modules>
<module>../biopet-core</module>
<module>../generate-indexes</module>
<module>../biopet-package</module>
<module>../bammetrics</module>
<module>../flexiprep</module>
......
......@@ -73,6 +73,8 @@ class BiopetFifoPipe(val root: Configurable,
_pipesJobs :::= commands
_pipesJobs = _pipesJobs.distinct
analysisName = commands.map(_.analysisName).mkString("_")
}
override def beforeCmd(): Unit = {
......
......@@ -27,7 +27,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
*/
class BiopetPipe(val commands: List[BiopetCommandLineFunction]) extends BiopetCommandLineFunction {
@Input
@Input(required = false)
lazy val input: List[File] = try {
commands.flatMap(_.inputs)
} catch {
......
......@@ -132,7 +132,7 @@ trait ReportBuilder extends ToolCommand {
logger.info("Start")
val argsParser = new OptParser
val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
require(cmdArgs.outputDir.exists(), "Output dir does not exist")
require(cmdArgs.outputDir.isDirectory, "Output dir is not a directory")
......@@ -205,7 +205,7 @@ trait ReportBuilder extends ToolCommand {
val pageOutputDir = new File(outputDir, path.mkString(File.separator))
pageOutputDir.mkdirs()
val rootPath = "./" + Array.fill(path.size)("src/main").mkString("")
val rootPath = "./" + Array.fill(path.size)("../").mkString
val pageArgs = args ++ page.args ++
Map("page" -> page,
"path" -> path,
......
......@@ -30,19 +30,21 @@ import csv
import datetime
def main(tsvfile, vcffile):
def main(tsvfile, vcffile, samplename):
'''
:param tsvfile: filename of input file.tsv
:type tsvfile: string
:param vcffile: filename of output file.vcf
:type vcffile: string
:param samplename: Name of the sample
:type samplename: string
'''
with open(tsvfile) as reader:
# Parse file
dictreader = _parse_tsvfile(reader)
# Write out file
_format_vcffile(dictreader, vcffile)
_format_vcffile(dictreader, vcffile, samplename)
def _parse_tsvfile(readable):
'''
......@@ -92,11 +94,11 @@ _tsv_fields = ('Chr1', 'Pos1', 'Orientation1',
_vcf_fields = ('CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT', 'default')
_vcf_fields = ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT']
TS_NOW = datetime.datetime.now()
VCF_HEADER = """##fileformat=VCFv4.1
VCF_HEADER = """##fileformat=VCFv4.2
##fileDate={filedate}
##source=breakdancer-max
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
......@@ -106,6 +108,7 @@ VCF_HEADER = """##fileformat=VCFv4.1
##INFO=<ID=NOVEL,Number=0,Type=Flag,Description="Indicates a novel structural variation">
##INFO=<ID=SVEND,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=SVMETHOD,Number=0,Type=String,Description="Program called with">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
......@@ -138,7 +141,7 @@ VCF_HEADER = """##fileformat=VCFv4.1
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">""".format( filedate=TS_NOW.strftime( "%Y%m%d" ) )
def _format_vcffile(dictreader, vcffile):
def _format_vcffile(dictreader, vcffile, samplename):
'''
Create a pseudo .vcf file based on values read from DictReader instance.
:param dictreader: DictReader instance to read data from
......@@ -148,22 +151,22 @@ def _format_vcffile(dictreader, vcffile):
'''
FORMAT = "GT:DP"
with open(vcffile, mode='w') as writer:
writer.write('{header}\n#{columns}\n'.format(header=VCF_HEADER, columns='\t'.join(_vcf_fields)))
writer.write('{header}\n#{columns}\n'.format(header=VCF_HEADER, columns='\t'.join(_vcf_fields + [samplename])))
output_vcf = []
for line in dictreader:
CHROM = line['Chr1']
# TODO Figure out whether we have zero or one based positioning
POS = int(line['Pos1'])
ALT = '.'
ALT = '<{}>'.format(line['Type'])
SVEND = int(line['Pos2'])
INFO = 'PROGRAM=breakdancer;SVTYPE={}'.format(line['Type'])
INFO = 'SVMETHOD=breakdancer;SVTYPE={}'.format(line['Type'])
if line['Type'] not in ['CTX']:
INFO += ';SVLEN={}'.format(int(line['Size']))
INFO += ";SVEND={}".format(SVEND)
INFO += ";END={}".format(SVEND)
# write alternate ALT field for Intrachromosomal translocations
if line['Type'] in ['CTX']:
ALT = "N[{}:{}[".format(line['Chr2'], line['Pos2'])
......@@ -172,7 +175,7 @@ def _format_vcffile(dictreader, vcffile):
SAMPLEINFO = "{}:{}".format( '1/.', line['num_Reads'] )
# Create record
output_vcf.append([CHROM, POS, '.', '.', ALT, '.', 'PASS', INFO, FORMAT, SAMPLEINFO])
output_vcf.append([CHROM, POS, '.', 'N', ALT, '.', 'PASS', INFO, FORMAT, SAMPLEINFO])
# Sort all results
output_vcf.sort()
......@@ -184,9 +187,11 @@ def _format_vcffile(dictreader, vcffile):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--breakdancertsv', dest='breakdancertsv', type=str,
help='Breakdancer TSV outputfile')
help='Breakdancer TSV outputfile')
parser.add_argument('-o', '--outputvcf', dest='outputvcf', type=str,
help='Output vcf to')
help='Output vcf to')
parser.add_argument('-s', '--sample', dest='sample', type=str,
help='sample name')
args = parser.parse_args()
main(args.breakdancertsv, args.outputvcf)
main(args.breakdancertsv, args.outputvcf, args.sample)
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library('cn.mops'))
suppressPackageStartupMessages(library('optparse'))
# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh
# modified to take arguments
option_list <- list(
make_option(c("--rawoutput"), dest="rawoutput"),
make_option(c("--cnv"), dest="cnv"),
make_option(c("--cnr"), dest="cnr"),
make_option(c("--chr"), dest="chr"),
make_option(c("--threads"), dest="threads", default=8, type="integer")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
arguments = parse_args(parser, positional_arguments=TRUE)
opt = arguments$options
args = arguments$args
chromosome <- opt$chr
CNVoutput <- opt$cnv
CNRoutput <- opt$cnr
bamFile <- args
BAMFiles <- c(bamFile)
bamDataRanges <- getReadCountsFromBAM(BAMFiles, mode="paired", refSeqName=chromosome, WL=1000, parallel=opt$threads)
write.table(as.data.frame( bamDataRanges ), quote = FALSE, opt$rawoutput, row.names=FALSE)
res <- cn.mops(bamDataRanges)
res <- calcIntegerCopyNumbers(res)
write.table(as.data.frame(cnvs(res)), quote = FALSE, CNVoutput, row.names=FALSE)
write.table(as.data.frame(cnvr(res)), quote = FALSE, CNRoutput, row.names=FALSE)
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
dir.create(chromosome, showWarnings=FALSE, recursive=TRUE, mode="0744")
# Plot chromosome per sample.
for ( i in 1:length(BAMFiles)){
png(file=paste(chromosome,"/",chromosome,"-segplot-",i,".png", sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white"
)
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
segplot(res,sampleIdx=i)
dev.off()
}
# Plot cnvr regions.
for ( i in 1:nrow(as.data.frame(cnvr(res)))) {
png(file=paste(chromosome,"/",chromosome,"-cnv-",i,".png",sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
plot(res,which=i,toFile=TRUE)
dev.off()
}
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library('cn.mops'))
suppressPackageStartupMessages(library('optparse'))
# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh
# modified to take arguments
option_list <- list(
make_option(c("--rawoutput"), dest="rawoutput"),
make_option(c("--cnv"), dest="cnv"),
make_option(c("--cnr"), dest="cnr"),
make_option(c("--chr"), dest="chr"),
make_option(c("--targetBed"), dest="targetBed"),
make_option(c("--threads"), dest="threads", default=8, type="integer")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
arguments = parse_args(parser, positional_arguments=TRUE)
opt = arguments$options
args = arguments$args
chromosome <- opt$chr
CNVoutput <- opt$cnv
CNRoutput <- opt$cnr
bamFile <- args
BAMFiles <- c(bamFile)
### WES Specific code
segments <- read.table(opt$targetBed, sep="\t", as.is=TRUE)
# filter the segments by the requested chromosome
segments <- segments[ segments[,1] == chromosome, ]
gr <- GRanges(segments[,1],IRanges(segments[,2],segments[,3]))
### END WES Specific code
bamDataRanges <- getSegmentReadCountsFromBAM(BAMFiles, GR=gr, mode="paired", parallel=opt$threads)
write.table(as.data.frame( bamDataRanges ), quote = FALSE, opt$rawoutput, row.names=FALSE)
res <- exomecn.mops(bamDataRanges)
res <- calcIntegerCopyNumbers(res)
write.table(as.data.frame(cnvs(res)), quote = FALSE, CNVoutput, row.names=FALSE)
write.table(as.data.frame(cnvr(res)), quote = FALSE, CNRoutput, row.names=FALSE)
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
dir.create(chromosome, showWarnings=FALSE, recursive=TRUE, mode="0744")
# Plot chromosome per sample.
for ( i in 1:length(BAMFiles)){
png(file=paste(chromosome,"/",chromosome,"-segplot-",i,".png", sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white"
)
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
segplot(res,sampleIdx=i)
dev.off()
}
# Plot cnvr regions.
for ( i in 1:nrow(as.data.frame(cnvr(res)))) {
png(file=paste(chromosome,"/",chromosome,"-cnv-",i,".png",sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
plot(res,which=i,toFile=TRUE)
dev.off()
}
library('optparse')
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list <- list(
make_option(c("-i", "--input"), dest="input"),
make_option(c("-o", "--output"), dest="output")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
opt = parse_args(parser)
#
# Load Data
#
dataTable <-read.table(opt$input, header=TRUE);
BAF<-data.frame(dataTable)
chromosomes <- levels(dataTable$Chromosome)
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
png(filename = opt$output, width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(6,4))
par(mar=plot_margins)
par(mgp=label_positions)
for (i in chromosomes) {
tt <- which(BAF$Chromosome==i)
if (length(tt)>0){
lBAF <-BAF[tt,]
plot(lBAF$Position,
lBAF$BAF,
ylim = c(-0.1,1.1),
xlab = paste ("position, chr",i),
ylab = "BAF",
pch = ".",
col = colors()[1])
tt <- which(lBAF$A==0.5)
points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[92])
tt <- which(lBAF$A!=0.5 & lBAF$A>=0)
points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[62])
tt <- 1
pres <- 1
if (length(lBAF$A)>4) {
for (j in c(2:(length(lBAF$A)-pres-1))) {
if (lBAF$A[j]==lBAF$A[j+pres]) {
tt[length(tt)+1] <- j
}
}
points(lBAF$Position[tt],lBAF$A[tt],pch = ".",col = colors()[24],cex=4)
points(lBAF$Position[tt],lBAF$B[tt],pch = ".",col = colors()[24],cex=4)
}
tt <- 1
pres <- 1
if (length(lBAF$FittedA)>4) {
for (j in c(2:(length(lBAF$FittedA)-pres-1))) {
if (lBAF$FittedA[j]==lBAF$FittedA[j+pres]) {
tt[length(tt)+1] <- j
}
}
points(lBAF$Position[tt],lBAF$FittedA[tt],pch = ".",col = colors()[463],cex=4)
points(lBAF$Position[tt],lBAF$FittedB[tt],pch = ".",col = colors()[463],cex=4)
}
}
}
dev.off()
library('optparse')
library('naturalsort')
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list <- list(
make_option(c("-m", "--mappability"), dest="mappability"),
make_option(c("-p", "--ploidy"), default=2, type="integer", dest="ploidy"),
make_option(c("-i", "--input"), dest="input"),
make_option(c("-o", "--output"), dest="output")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
opt = parse_args(parser)
#
# Load mappability track
#
mappabilityFile <- opt$mappability
mappabilityTrack <- read.table(mappabilityFile, header=FALSE, col.names=c("chrom", "start", "end", "score"))
mappabilityTrack$Start <- mappabilityTrack$start+1
mappabilityTrack$Chromosome <- gsub("chr", "", mappabilityTrack$chrom)
#
# Load Data
#
dataTable <- read.table( opt$input , header=TRUE)
input_ratio <- data.frame(dataTable)
chromosomes <- naturalsort(levels(input_ratio$Chromosome))
input_ratio$Chromosome <- factor(input_ratio$Chromosome, levels=chromosomes, ordered=T)
sorted_ratio <- input_ratio[order(input_ratio$Chromosome),]
ratio <- merge(sorted_ratio, mappabilityTrack, sort=TRUE)
ratio <- ratio[order(ratio$Chromosome, ratio$Start),]
ploidy <- opt$ploidy
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
maxLevelToPlot <- 3
for (i in c(1:length(ratio$Ratio))) {
if (ratio$Ratio[i]>maxLevelToPlot) {
ratio$Ratio[i]=maxLevelToPlot
}
}
#
# Plot the graphs per chromosome
#
for (i in chromosomes) {
png(filename = paste(opt$output, ".", i,".png",sep=""), width = 4 * ppi, height = 2.5 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
tt <- which(ratio$Chromosome==i)
if (length(tt)>0) {
plot(ratio$Start[tt],
ratio$Ratio[tt]*ploidy,
ylim = c(0,maxLevelToPlot*ploidy),
xlab = paste ("position, chr",i),
ylab = "normalized CN",
pch = ".",
col = colors()[88])
title(outer=TRUE)