Commit d12ce44a authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'develop' into feature-vcfwithvcf-number

parents 28b88048 7ccf5ce0
......@@ -166,11 +166,15 @@ class BamMetrics(val root: Configurable) extends QScript
addSummarizable(biopetFlagstatLoose, targetName + "_biopet_flagstat_loose")
add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose)))
val sorter = new BedtoolsSort(this)
sorter.input = intervals.bed
sorter.output = swapExt(targetDir, intervals.bed, ".bed", ".sorted.bed")
add(sorter)
val bedCov = BedtoolsCoverage(this, sorter.output, inputBam, depth = true)
val sortedBed = BamMetrics.sortedbedCache.getOrElse(intervals.bed, {
val sorter = new BedtoolsSort(this)
sorter.input = intervals.bed
sorter.output = swapExt(targetDir, intervals.bed, ".bed", ".sorted.bed")
add(sorter)
BamMetrics.sortedbedCache += intervals.bed -> sorter.output
sorter.output
})
val bedCov = BedtoolsCoverage(this, sortedBed, inputBam, depth = true)
val covStats = CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
covStats.title = Some("Coverage Plot")
covStats.subTitle = Some(s"for file '$targetName.bed'")
......@@ -198,4 +202,6 @@ object BamMetrics extends PipelineCommand {
bamMetrics.biopetScript()
bamMetrics
}
private var sortedbedCache: Map[File, File] = Map()
}
......@@ -81,7 +81,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
this match {
case r: Reference =>
if (r.dictRequired) deps :+= r.referenceDict
if (r.dictRequired) deps :+= r.referenceDictFile
if (r.faiRequired) deps :+= r.referenceFai
deps = deps.distinct
case _ =>
......
......@@ -40,16 +40,22 @@ trait CommandLineResources extends CommandLineFunction with Configurable {
def defaultResidentFactor: Double = 1.2
var vmemFactor: Double = config("vmem_factor", default = defaultVmemFactor)
val useSge: Boolean = config("use_sge", default = true)
var residentFactor: Double = config("resident_factor", default = defaultResidentFactor)
private var _coreMemory: Double = 2.0
def coreMemory = _coreMemory
/** This value is for SGE and is defined in seconds */
protected val maxWalltimeLimit: Option[Int] = config("max_walltime_limit")
var retry = 0
override def freezeFieldValues(): Unit = {
setResources()
if (vmem.isDefined) jobResourceRequests :+= "h_vmem=" + vmem.get
if (useSge && vmem.isDefined) jobResourceRequests :+= s"h_vmem=${vmem.get}"
if (useSge && maxWalltimeLimit.isDefined) jobResourceRequests :+= s"h_rt=${maxWalltimeLimit.get}"
super.freezeFieldValues()
}
......
......@@ -17,9 +17,9 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import htsjdk.samtools.reference.IndexedFastaSequenceFile
import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, Summarizable }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, FastaUtils, Logging }
import scala.collection.JavaConversions._
......@@ -49,6 +49,8 @@ trait Reference extends Configurable {
}
}
def referenceDict = FastaUtils.getCachedDict(referenceFasta())
/** All config values will get a prefix */
override def subPath = {
referenceConfigPath ::: super.subPath
......@@ -66,7 +68,7 @@ trait Reference extends Configurable {
def dictRequired = this.isInstanceOf[Summarizable] || this.isInstanceOf[SummaryQScript]
/** Returns the dict file belonging to the fasta file */
def referenceDict = new File(referenceFasta().getAbsolutePath
def referenceDictFile = new File(referenceFasta().getAbsolutePath
.stripSuffix(".fa")
.stripSuffix(".fasta")
.stripSuffix(".fna") + ".dict")
......@@ -108,9 +110,8 @@ trait Reference extends Configurable {
/** Create summary part for reference */
def referenceSummary: Map[String, Any] = {
Reference.requireDict(referenceFasta())
val file = new IndexedFastaSequenceFile(referenceFasta())
Map("contigs" ->
(for (seq <- file.getSequenceDictionary.getSequences) yield seq.getSequenceName -> {
(for (seq <- referenceDict.getSequences) yield seq.getSequenceName -> {
val md5 = Option(seq.getAttribute("M5"))
Map("md5" -> md5, "length" -> seq.getSequenceLength)
}).toMap,
......
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library('cn.mops'))
suppressPackageStartupMessages(library('optparse'))
suppressWarnings(suppressPackageStartupMessages(library('cn.mops')))
suppressWarnings(suppressPackageStartupMessages(library('optparse')))
# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh
# modified to take arguments
......@@ -10,7 +10,9 @@ option_list <- list(
make_option(c("--cnv"), dest="cnv"),
make_option(c("--cnr"), dest="cnr"),
make_option(c("--chr"), dest="chr"),
make_option(c("--threads"), dest="threads", default=8, type="integer")
make_option(c("--threads"), dest="threads", default=8, type="integer"),
make_option(c("--wl"), dest="wl", default=1000, type="integer"),
make_option(c("--version"), action="store_true", default=FALSE)
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
......@@ -18,13 +20,32 @@ arguments = parse_args(parser, positional_arguments=TRUE)
opt = arguments$options
args = arguments$args
if (opt$version) {
cat(toString(packageVersion('cn.mops')))
quit("no")
}
chromosome <- opt$chr
CNVoutput <- opt$cnv
CNRoutput <- opt$cnr
windowLength <- opt$wl
bamFile <- args
BAMFiles <- c(bamFile)
bamDataRanges <- getReadCountsFromBAM(BAMFiles, mode="paired", refSeqName=chromosome, WL=1000, parallel=opt$threads)
BAMFiles <- strsplit(c(bamFile), " ")[[1]]
bamDataRanges <- tryCatch(
{
getReadCountsFromBAM(BAMFiles, mode="paired", refSeqName=chromosome, WL=windowLength, parallel=opt$threads)
},
error = function(e) {
quit("no")
},
warning = function(w) {
quit("no")
},
finally = {
}
)
write.table(as.data.frame( bamDataRanges ), quote = FALSE, opt$rawoutput, row.names=FALSE)
......@@ -43,7 +64,7 @@ dir.create(chromosome, showWarnings=FALSE, recursive=TRUE, mode="0744")
# Plot chromosome per sample.
for ( i in 1:length(BAMFiles)){
png(file=paste(chromosome,"/",chromosome,"-segplot-",i,".png", sep=""),
png(file=paste(dirname(opt$rawoutput),"/",chromosome,"-segplot-",i,".png", sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white"
)
......@@ -56,7 +77,7 @@ for ( i in 1:length(BAMFiles)){
# Plot cnvr regions.
for ( i in 1:nrow(as.data.frame(cnvr(res)))) {
png(file=paste(chromosome,"/",chromosome,"-cnv-",i,".png",sep=""),
png(file=paste(dirname(opt$rawoutput),"/",chromosome,"-cnv-",i,".png",sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
......
......@@ -4,7 +4,6 @@ library('naturalsort')
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list <- list(
make_option(c("-m", "--mappability"), dest="mappability"),
make_option(c("-p", "--ploidy"), default=2, type="integer", dest="ploidy"),
make_option(c("-i", "--input"), dest="input"),
make_option(c("-o", "--output"), dest="output")
......@@ -14,17 +13,6 @@ parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
opt = parse_args(parser)
#
# Load mappability track
#
mappabilityFile <- opt$mappability
mappabilityTrack <- read.table(mappabilityFile, header=FALSE, col.names=c("chrom", "start", "end", "score"))
mappabilityTrack$Start <- mappabilityTrack$start+1
mappabilityTrack$Chromosome <- gsub("chr", "", mappabilityTrack$chrom)
#
# Load Data
#
......@@ -37,9 +25,7 @@ chromosomes <- naturalsort(levels(input_ratio$Chromosome))
input_ratio$Chromosome <- factor(input_ratio$Chromosome, levels=chromosomes, ordered=T)
sorted_ratio <- input_ratio[order(input_ratio$Chromosome),]
ratio <- merge(sorted_ratio, mappabilityTrack, sort=TRUE)
ratio <- ratio[order(ratio$Chromosome, ratio$Start),]
ratio <- input_ratio[order(input_ratio$Chromosome, input_ratio$Start),]
ploidy <- opt$ploidy
ppi <- 300
......@@ -99,15 +85,6 @@ for (i in chromosomes) {
dev.off()
}
png(filename = paste(opt$output, ".png",sep=""), width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(6,4))
......@@ -151,7 +128,7 @@ dev.off()
# Export the whole genome graph
png(filename = paste(opt$output, ".wg.png",sep=""), width = 16 * ppi, height = 10 * ppi,
png(filename = paste(opt$output, ".wg.png",sep=""), width = 50 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
plot_margins <- c(3,4,2,2)+0.1
......
import argparse
from os.path import join
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
class BamRatioReader(object):
"""
Reader object for bam_ratio.txt files
"""
def __init__(self, filename):
self.filename = filename
self.__handle = open(filename)
def __reset_handle(self):
self.__handle.close()
self.__handle = open(self.filename)
def get_chromosome(self, chromosome):
self.__reset_handle()
lines = []
for line in self.__handle:
if line.split("\t")[0] == chromosome:
lines.append(line)
return lines
@property
def chromosomes(self):
self.__reset_handle()
chrs = []
for x in self.__handle:
if "Chromosome" not in x:
chrs.append(x.split("\t")[0])
return list(set(chrs))
def get_cn_lines(lines, cn_type="diploid" ,ploidy=2):
"""
For a list if lines, return those lines belonging to a certain cn_typle
:param lines: list of lines
:param cn_type: either "normal" (cn= ploidy), "loss" (cn < ploidy) or "gain" (cn > ploidy)
:return: list of lines
"""
n_lines = []
for line in lines:
cn = line.strip().split("\t")[-1]
if cn_type == "normal" and int(cn) == ploidy:
n_lines.append(line)
elif cn_type == "loss" and int(cn) < ploidy:
n_lines.append(line)
elif cn_type == "gain" and int(cn) > ploidy:
n_lines.append(line)
return n_lines
def lines_to_array(lines, ploidy=2):
"""
Convert list of lines to numpy array of [start, ratio]
"""
tmp = []
for x in lines:
start = x.split("\t")[1]
ratio = x.split("\t")[2]
tmp.append([int(start), float(ratio)*ploidy])
return np.array(tmp)
def plot_chromosome(lines, chromosome, output_file, ploidy):
"""
Plot lines belonging to a chromosome
green = where CN = ploidy
red = where CN > ploidy
blue = where CN < ploidy
"""
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
normals = lines_to_array(get_cn_lines(lines, "normal", ploidy), ploidy)
losses = lines_to_array(get_cn_lines(lines, "loss", ploidy), ploidy)
gains = lines_to_array(get_cn_lines(lines, "gain", ploidy), ploidy)
print("Plotting chromosome {0}".format(chromosome))
all_x = []
if len(normals) > 0:
ax.scatter(normals[:, 0], normals[:,1], color="g")
for x in normals[:, 0]:
all_x += [x]
if len(losses) > 0:
ax.scatter(losses[:, 0], losses[:,1], color="b")
for x in losses[:, 0]:
all_x += [x]
if len(gains) > 0:
for x in gains[:, 0]:
all_x += [x]
ax.scatter(gains[:,0], gains[:,1], color="r")
ax.set_ylim(0, ploidy*3)
ax.set_xlim(int(0-(max(all_x)*0.1)), int(max(all_x)+(max(all_x)*0.1)))
ax.set_xlabel("chromosome position")
ax.set_ylabel("CN")
ax.set_title("Chromosome {0}".format(chromosome))
plt.savefig(output_file)
plt.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-I', "--input", required=True, help="Input bam_ratio.txt file")
parser.add_argument('-O', '--output-prefix', required=True, help="Path to output prefix")
parser.add_argument("-p", "--ploidy", type=int, default=2, help="Ploidy of sample")
args = parser.parse_args()
reader = BamRatioReader(args.input)
for chromosome in reader.chromosomes:
ofile = args.output_prefix + "." + "chr{0}.png".format(chromosome)
lines = reader.get_chromosome(chromosome)
plot_chromosome(lines, chromosome, ofile, args.ploidy)
\ No newline at end of file
......@@ -16,43 +16,102 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.Version
import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import org.broadinstitute.gatk.utils.commandline._
import nl.lumc.sasc.biopet.utils.getSemanticVersion
/**
* Wrapper for the Cnmops command line tool.
* Written based on Cnmops version v2.2.1.
*/
class Cnmops(val root: Configurable) extends RscriptCommandLineFunction {
class Cnmops(val root: Configurable) extends RscriptCommandLineFunction with Version {
override def defaultThreads = 4
override def defaultCoreMemory: Double = 4.0
protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/cnmops.R")
def versionCommand = {
val v = super.cmdLine + "--version"
v.trim.replace("'", "")
}
def versionRegex = "(\\d+\\.\\d+\\.\\d+)".r
private def stringToInt(s: String): Option[Int] = {
try {
Some(s.toInt)
} catch {
case e: Exception => None
}
}
/**
* Check whether version of cn mops is at least 1.18.0
*
* @return
*/
def versionCheck: Boolean = {
getVersion.flatMap(getSemanticVersion(_)) match {
case Some(version) => (version.major == 1 && version.minor >= 18) || version.major >= 2
case _ => false
}
}
@Input(doc = "Input file BAM", required = true)
var input: List[File] = List()
@Argument(doc = "Chromsomosome to query", required = true)
var chromosome: String = _
@Argument(doc = "Window length", required = false)
var windowLength: Int = config("window_length", namespace = "kopisu", default = 1000)
// output files, computed automatically from output directory
@Output(doc = "Output CNV file")
private lazy val outputCnv: File = {
require(outputDir == null, "Unexpected error when trying to set cn.MOPS CNV output")
new File(outputDir, "cnv.txt")
lazy val outputCnv: File = {
outputDir match {
case Some(dir) => new File(dir, "cnv.txt")
case _ => throw new IllegalArgumentException("Unexpected error when trying to set cn.MOPS CNV output")
}
}
@Output(doc = "Output CNR file")
private lazy val outputCnr: File = {
require(outputDir == null, "Unexpected error when trying to set cn.MOPS CNR output")
new File(outputDir, "cnr.txt")
lazy val outputCnr: File = {
outputDir match {
case Some(dir) => new File(dir, "cnr.txt")
case _ => throw new IllegalArgumentException("Unexpected error when trying to set cn.MOPS CNR output")
}
}
@Output(doc = "Raw output")
lazy val rawOutput: File = {
outputDir match {
case Some(dir) => new File(dir, "rawoutput.txt")
case _ => throw new IllegalArgumentException("Unexpected error when trying to set cn.MOPS raw output")
}
}
/** write all output files to this directory [./] */
var outputDir: String = _
var outputDir: Option[File] = None
override def beforeGraph = {
super.beforeGraph
require(!outputDir.isEmpty, "Outputdir for cn.MOPS should not be empty")
require(outputDir.isDefined, "Outputdir for cn.MOPS should not be empty")
require(input.length >= 2, "Please supply at least 2 BAM files for cn.MOPS")
if (!versionCheck) {
logger.warn("cn.mops version is below 1.18.0. Contigs containing little to no reads WILL fail")
}
}
override def cmdLine = super.cmdLine +
required(input.foreach(f => f.getAbsolutePath).toString.mkString(" "))
required("--cnr", outputCnr) +
required("--cnv", outputCnv) +
required("--chr", chromosome) +
required("--rawoutput", rawOutput) +
required("--threads", threads) +
optional("--wl", windowLength) +
repeat(input)
}
......@@ -29,7 +29,6 @@ class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Input file", required = true)
var input: List[File] = Nil
@Argument(doc = "Set flanking amount")
var flanking: Option[Int] = config("flanking")
var exclusionRegions: List[File] = config("exclusion_regions", default = Nil)
......
......@@ -22,6 +22,8 @@ import org.broadinstitute.gatk.utils.commandline._
class FreeC(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
override def defaults = Map("max_walltime_limit" -> 7200)
@Input(doc = "BAMfile", required = true)
var input: File = _
......@@ -125,7 +127,7 @@ class FreeC(val root: Configurable) extends BiopetCommandLineFunction with Refer
override def versionCommand = executable
override def versionRegex = """Control-FREEC v([0-9\.]+) : .*""".r
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
override def defaultCoreMemory = 50
private var configFile: File = _
......
......@@ -16,12 +16,12 @@ package nl.lumc.sasc.biopet.extensions.freec
import java.io.File
import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction
import nl.lumc.sasc.biopet.core.extensions.{ PythonCommandLineFunction, RscriptCommandLineFunction }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class FreeCCNVPlot(val root: Configurable) extends RscriptCommandLineFunction {
protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/freec/freec_CNVPlot.R")
class FreeCCNVPlot(val root: Configurable) extends PythonCommandLineFunction {
setPythonScript("freec_CNVPlot.py")
@Input(doc = "Output file from FreeC. *_CNV", required = true)
var input: File = null
......@@ -29,12 +29,7 @@ class FreeCCNVPlot(val root: Configurable) extends RscriptCommandLineFunction {
@Output(doc = "Destination for the PNG file", required = true)
var output: File = null
/**
* cmdLine to execute R-script and with arguments
* Arguments should be pasted in the same order as the script is expecting it.
* Unless some R library is used for named arguments
*/
override def cmdLine = super.cmdLine +
required("-i", input) +
required("-o", output)
override def cmdLine = getPythonCommand +
required("-I", input) +
required("-O", output)
}
......@@ -37,7 +37,7 @@ class SortVcf(val root: Configurable) extends Picard with Reference {
override def beforeGraph(): Unit = {
super.beforeGraph()
if (sequenceDictionary == null) sequenceDictionary = referenceDict
if (sequenceDictionary == null) sequenceDictionary = referenceDictFile
}
/** Returns command to execute */
......
package nl.lumc.sasc.biopet.extensions
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by Sander Bollen on 17-10-16.
*/
class CnmopsTest extends TestNGSuite with Matchers {
@Test
def testVersionCommand() = {