Commit cb7a051c authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'release-0.7.0' into 'master'

Release 0.7.0



See merge request !449
parents 23c1ccea bf548a42
......@@ -9,8 +9,7 @@
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
......@@ -30,19 +29,21 @@ import csv
import datetime
def main(tsvfile, vcffile):
def main(tsvfile, vcffile, samplename):
'''
:param tsvfile: filename of input file.tsv
:type tsvfile: string
:param vcffile: filename of output file.vcf
:type vcffile: string
:param samplename: Name of the sample
:type samplename: string
'''
with open(tsvfile) as reader:
# Parse file
dictreader = _parse_tsvfile(reader)
# Write out file
_format_vcffile(dictreader, vcffile)
_format_vcffile(dictreader, vcffile, samplename)
def _parse_tsvfile(readable):
'''
......@@ -92,11 +93,11 @@ _tsv_fields = ('Chr1', 'Pos1', 'Orientation1',
_vcf_fields = ('CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT', 'default')
_vcf_fields = ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT']
TS_NOW = datetime.datetime.now()
VCF_HEADER = """##fileformat=VCFv4.1
VCF_HEADER = """##fileformat=VCFv4.2
##fileDate={filedate}
##source=breakdancer-max
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
......@@ -106,6 +107,7 @@ VCF_HEADER = """##fileformat=VCFv4.1
##INFO=<ID=NOVEL,Number=0,Type=Flag,Description="Indicates a novel structural variation">
##INFO=<ID=SVEND,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
##INFO=<ID=SVMETHOD,Number=0,Type=String,Description="Program called with">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
......@@ -138,7 +140,7 @@ VCF_HEADER = """##fileformat=VCFv4.1
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">""".format( filedate=TS_NOW.strftime( "%Y%m%d" ) )
def _format_vcffile(dictreader, vcffile):
def _format_vcffile(dictreader, vcffile, samplename):
'''
Create a pseudo .vcf file based on values read from DictReader instance.
:param dictreader: DictReader instance to read data from
......@@ -148,22 +150,22 @@ def _format_vcffile(dictreader, vcffile):
'''
FORMAT = "GT:DP"
with open(vcffile, mode='w') as writer:
writer.write('{header}\n#{columns}\n'.format(header=VCF_HEADER, columns='\t'.join(_vcf_fields)))
writer.write('{header}\n#{columns}\n'.format(header=VCF_HEADER, columns='\t'.join(_vcf_fields + [samplename])))
output_vcf = []
for line in dictreader:
CHROM = line['Chr1']
# TODO Figure out whether we have zero or one based positioning
POS = int(line['Pos1'])
ALT = '.'
ALT = '<{}>'.format(line['Type'])
SVEND = int(line['Pos2'])
INFO = 'PROGRAM=breakdancer;SVTYPE={}'.format(line['Type'])
INFO = 'SVMETHOD=breakdancer;SVTYPE={}'.format(line['Type'])
if line['Type'] not in ['CTX']:
INFO += ';SVLEN={}'.format(int(line['Size']))
INFO += ";SVEND={}".format(SVEND)
INFO += ";END={}".format(SVEND)
# write alternate ALT field for Intrachromosomal translocations
if line['Type'] in ['CTX']:
ALT = "N[{}:{}[".format(line['Chr2'], line['Pos2'])
......@@ -172,7 +174,7 @@ def _format_vcffile(dictreader, vcffile):
SAMPLEINFO = "{}:{}".format( '1/.', line['num_Reads'] )
# Create record
output_vcf.append([CHROM, POS, '.', '.', ALT, '.', 'PASS', INFO, FORMAT, SAMPLEINFO])
output_vcf.append([CHROM, POS, '.', 'N', ALT, '.', 'PASS', INFO, FORMAT, SAMPLEINFO])
# Sort all results
output_vcf.sort()
......@@ -184,9 +186,11 @@ def _format_vcffile(dictreader, vcffile):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--breakdancertsv', dest='breakdancertsv', type=str,
help='Breakdancer TSV outputfile')
help='Breakdancer TSV outputfile')
parser.add_argument('-o', '--outputvcf', dest='outputvcf', type=str,
help='Output vcf to')
help='Output vcf to')
parser.add_argument('-s', '--sample', dest='sample', type=str,
help='sample name')
args = parser.parse_args()
main(args.breakdancertsv, args.outputvcf)
main(args.breakdancertsv, args.outputvcf, args.sample)
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library('cn.mops'))
suppressPackageStartupMessages(library('optparse'))
# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh
# modified to take arguments
option_list <- list(
make_option(c("--rawoutput"), dest="rawoutput"),
make_option(c("--cnv"), dest="cnv"),
make_option(c("--cnr"), dest="cnr"),
make_option(c("--chr"), dest="chr"),
make_option(c("--threads"), dest="threads", default=8, type="integer")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
arguments = parse_args(parser, positional_arguments=TRUE)
opt = arguments$options
args = arguments$args
chromosome <- opt$chr
CNVoutput <- opt$cnv
CNRoutput <- opt$cnr
bamFile <- args
BAMFiles <- c(bamFile)
bamDataRanges <- getReadCountsFromBAM(BAMFiles, mode="paired", refSeqName=chromosome, WL=1000, parallel=opt$threads)
write.table(as.data.frame( bamDataRanges ), quote = FALSE, opt$rawoutput, row.names=FALSE)
res <- cn.mops(bamDataRanges)
res <- calcIntegerCopyNumbers(res)
write.table(as.data.frame(cnvs(res)), quote = FALSE, CNVoutput, row.names=FALSE)
write.table(as.data.frame(cnvr(res)), quote = FALSE, CNRoutput, row.names=FALSE)
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
dir.create(chromosome, showWarnings=FALSE, recursive=TRUE, mode="0744")
# Plot chromosome per sample.
for ( i in 1:length(BAMFiles)){
png(file=paste(chromosome,"/",chromosome,"-segplot-",i,".png", sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white"
)
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
segplot(res,sampleIdx=i)
dev.off()
}
# Plot cnvr regions.
for ( i in 1:nrow(as.data.frame(cnvr(res)))) {
png(file=paste(chromosome,"/",chromosome,"-cnv-",i,".png",sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
plot(res,which=i,toFile=TRUE)
dev.off()
}
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library('cn.mops'))
suppressPackageStartupMessages(library('optparse'))
# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh
# modified to take arguments
option_list <- list(
make_option(c("--rawoutput"), dest="rawoutput"),
make_option(c("--cnv"), dest="cnv"),
make_option(c("--cnr"), dest="cnr"),
make_option(c("--chr"), dest="chr"),
make_option(c("--targetBed"), dest="targetBed"),
make_option(c("--threads"), dest="threads", default=8, type="integer")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
arguments = parse_args(parser, positional_arguments=TRUE)
opt = arguments$options
args = arguments$args
chromosome <- opt$chr
CNVoutput <- opt$cnv
CNRoutput <- opt$cnr
bamFile <- args
BAMFiles <- c(bamFile)
### WES Specific code
segments <- read.table(opt$targetBed, sep="\t", as.is=TRUE)
# filter the segments by the requested chromosome
segments <- segments[ segments[,1] == chromosome, ]
gr <- GRanges(segments[,1],IRanges(segments[,2],segments[,3]))
### END WES Specific code
bamDataRanges <- getSegmentReadCountsFromBAM(BAMFiles, GR=gr, mode="paired", parallel=opt$threads)
write.table(as.data.frame( bamDataRanges ), quote = FALSE, opt$rawoutput, row.names=FALSE)
res <- exomecn.mops(bamDataRanges)
res <- calcIntegerCopyNumbers(res)
write.table(as.data.frame(cnvs(res)), quote = FALSE, CNVoutput, row.names=FALSE)
write.table(as.data.frame(cnvr(res)), quote = FALSE, CNRoutput, row.names=FALSE)
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
dir.create(chromosome, showWarnings=FALSE, recursive=TRUE, mode="0744")
# Plot chromosome per sample.
for ( i in 1:length(BAMFiles)){
png(file=paste(chromosome,"/",chromosome,"-segplot-",i,".png", sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white"
)
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
segplot(res,sampleIdx=i)
dev.off()
}
# Plot cnvr regions.
for ( i in 1:nrow(as.data.frame(cnvr(res)))) {
png(file=paste(chromosome,"/",chromosome,"-cnv-",i,".png",sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
plot(res,which=i,toFile=TRUE)
dev.off()
}
library('optparse')
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list <- list(
make_option(c("-i", "--input"), dest="input"),
make_option(c("-o", "--output"), dest="output")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
opt = parse_args(parser)
#
# Load Data
#
dataTable <-read.table(opt$input, header=TRUE);
BAF<-data.frame(dataTable)
chromosomes <- levels(dataTable$Chromosome)
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
png(filename = opt$output, width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(6,4))
par(mar=plot_margins)
par(mgp=label_positions)
for (i in chromosomes) {
tt <- which(BAF$Chromosome==i)
if (length(tt)>0){
lBAF <-BAF[tt,]
plot(lBAF$Position,
lBAF$BAF,
ylim = c(-0.1,1.1),
xlab = paste ("position, chr",i),
ylab = "BAF",
pch = ".",
col = colors()[1])
tt <- which(lBAF$A==0.5)
points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[92])
tt <- which(lBAF$A!=0.5 & lBAF$A>=0)
points(lBAF$Position[tt],lBAF$BAF[tt],pch = ".",col = colors()[62])
tt <- 1
pres <- 1
if (length(lBAF$A)>4) {
for (j in c(2:(length(lBAF$A)-pres-1))) {
if (lBAF$A[j]==lBAF$A[j+pres]) {
tt[length(tt)+1] <- j
}
}
points(lBAF$Position[tt],lBAF$A[tt],pch = ".",col = colors()[24],cex=4)
points(lBAF$Position[tt],lBAF$B[tt],pch = ".",col = colors()[24],cex=4)
}
tt <- 1
pres <- 1
if (length(lBAF$FittedA)>4) {
for (j in c(2:(length(lBAF$FittedA)-pres-1))) {
if (lBAF$FittedA[j]==lBAF$FittedA[j+pres]) {
tt[length(tt)+1] <- j
}
}
points(lBAF$Position[tt],lBAF$FittedA[tt],pch = ".",col = colors()[463],cex=4)
points(lBAF$Position[tt],lBAF$FittedB[tt],pch = ".",col = colors()[463],cex=4)
}
}
}
dev.off()
library('optparse')
library('naturalsort')
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list <- list(
make_option(c("-m", "--mappability"), dest="mappability"),
make_option(c("-p", "--ploidy"), default=2, type="integer", dest="ploidy"),
make_option(c("-i", "--input"), dest="input"),
make_option(c("-o", "--output"), dest="output")
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
opt = parse_args(parser)
#
# Load mappability track
#
mappabilityFile <- opt$mappability
mappabilityTrack <- read.table(mappabilityFile, header=FALSE, col.names=c("chrom", "start", "end", "score"))
mappabilityTrack$Start <- mappabilityTrack$start+1
mappabilityTrack$Chromosome <- gsub("chr", "", mappabilityTrack$chrom)
#
# Load Data
#
dataTable <- read.table( opt$input , header=TRUE)
input_ratio <- data.frame(dataTable)
chromosomes <- naturalsort(levels(input_ratio$Chromosome))
input_ratio$Chromosome <- factor(input_ratio$Chromosome, levels=chromosomes, ordered=T)
sorted_ratio <- input_ratio[order(input_ratio$Chromosome),]
ratio <- merge(sorted_ratio, mappabilityTrack, sort=TRUE)
ratio <- ratio[order(ratio$Chromosome, ratio$Start),]
ploidy <- opt$ploidy
ppi <- 300
plot_margins <- c(3,4,1,2)+0.1
label_positions <- c(2,0.5,0)
maxLevelToPlot <- 3
for (i in c(1:length(ratio$Ratio))) {
if (ratio$Ratio[i]>maxLevelToPlot) {
ratio$Ratio[i]=maxLevelToPlot
}
}
#
# Plot the graphs per chromosome
#
for (i in chromosomes) {
png(filename = paste(opt$output, ".", i,".png",sep=""), width = 4 * ppi, height = 2.5 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
tt <- which(ratio$Chromosome==i)
if (length(tt)>0) {
plot(ratio$Start[tt],
ratio$Ratio[tt]*ploidy,
ylim = c(0,maxLevelToPlot*ploidy),
xlab = paste ("position, chr",i),
ylab = "normalized CN",
pch = ".",
col = colors()[88])
title(outer=TRUE)
tt <- which(ratio$Chromosome==i & ratio$CopyNumber>ploidy )
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136])
tt <- which(ratio$Chromosome==i & ratio$Ratio==maxLevelToPlot & ratio$CopyNumber>ploidy)
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136],cex=4)
tt <- which(ratio$Chromosome==i & ratio$CopyNumber<ploidy & ratio$CopyNumber!= -1)
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[461])
tt <- which(ratio$Chromosome==i)
#UNCOMMENT HERE TO SEE THE PREDICTED COPY NUMBER LEVEL:
#points(ratio$Start[tt],ratio$CopyNumber[tt], pch = ".", col = colors()[24],cex=4)
}
#tt <- which(ratio$Chromosome==i)
#UNCOMMENT HERE TO SEE THE EVALUATED MEDIAN LEVEL PER SEGMENT:
#points(ratio$Start[tt],ratio$MedianRatio[tt]*ploidy, pch = ".", col = colors()[463],cex=4)
dev.off()
}
png(filename = paste(opt$output, ".png",sep=""), width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(6,4))
par(mar=plot_margins)
par(mgp=label_positions)
for (i in chromosomes) {
tt <- which(ratio$Chromosome==i)
if (length(tt)>0) {
plot(ratio$Start[tt],
ratio$Ratio[tt]*ploidy,
ylim = c(0,maxLevelToPlot*ploidy),
xlab = paste ("position, chr",i),
ylab = "normalized CN",
pch = ".",
col = colors()[88])
tt <- which(ratio$Chromosome==i & ratio$CopyNumber>ploidy )
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136])
tt <- which(ratio$Chromosome==i & ratio$Ratio==maxLevelToPlot & ratio$CopyNumber>ploidy)
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[136],cex=4)
tt <- which(ratio$Chromosome==i & ratio$CopyNumber<ploidy & ratio$CopyNumber!= -1)
points(ratio$Start[tt],ratio$Ratio[tt]*ploidy,pch = ".",col = colors()[461])
tt <- which(ratio$Chromosome==i)
#UNCOMMENT HERE TO SEE THE PREDICTED COPY NUMBER LEVEL:
#points(ratio$Start[tt],ratio$CopyNumber[tt], pch = ".", col = colors()[24],cex=4)
}
#tt <- which(ratio$Chromosome==i)
#UNCOMMENT HERE TO SEE THE EVALUATED MEDIAN LEVEL PER SEGMENT:
#points(ratio$Start[tt],ratio$MedianRatio[tt]*ploidy, pch = ".", col = colors()[463],cex=4)
}
dev.off()
# Export the whole genome graph
png(filename = paste(opt$output, ".wg.png",sep=""), width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
plot_margins <- c(3,4,2,2)+0.1
label_positions <- c(2,0.5,0)
par(mfrow = c(1,1))
par(mar=plot_margins)
par(mgp=label_positions)
par(xaxs="i", yaxs="i")
maxLevelToPlot <- 3
for (i in c(1:length(ratio$Ratio))) {
if (ratio$Ratio[i]>maxLevelToPlot) {
ratio$Ratio[i]=maxLevelToPlot
}
}
for (i in c(1:length(ratio$Start))) {
ratio$Position[i] = (i-1) *5000 +1
}
plotRatioLT <- 0.10
filteredSet <- ratio[ ratio$score > plotRatioLT, ]
plot(filteredSet$Position,
filteredSet$Ratio*ploidy,
ylim = c(0,maxLevelToPlot*ploidy),
xlab = paste ("Chr. on genome"),
ylab = "normalized CN",
pch = ".",
col = colors()[88])
title(outer=TRUE)
tt <- which(filteredSet$CopyNumber>ploidy)
points(filteredSet$Position[tt],filteredSet$Ratio[tt]*ploidy,pch = ".",col = colors()[136])
tt <- which(filteredSet$Ratio==maxLevelToPlot & filteredSet$CopyNumber>ploidy)
points(filteredSet$Position[tt],filteredSet$Ratio[tt]*ploidy,pch = ".",col = colors()[136],cex=4)
tt <- which(filteredSet$CopyNumber<ploidy & filteredSet$CopyNumber!= -1)
points(filteredSet$Position[tt],filteredSet$Ratio[tt]*ploidy,pch = ".",col = colors()[461], bg="black")
for (chrom in chromosomes) {