Commit b6382747 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'add-cnmops-kopisu' into 'develop'

Adding CNmops as method to Kopisu

Depends on #369 and #370 

See merge request !418
parents 3c836e0c b8830d55
......@@ -18,7 +18,7 @@ import java.io.File
import htsjdk.samtools.reference.IndexedFastaSequenceFile
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.utils.config.{Config, Configurable}
import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, FastaUtils, Logging }
import scala.collection.JavaConversions._
......
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library('cn.mops'))
suppressPackageStartupMessages(library('optparse'))
suppressWarnings(suppressPackageStartupMessages(library('cn.mops')))
suppressWarnings(suppressPackageStartupMessages(library('optparse')))
# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh
# modified to take arguments
......@@ -10,7 +10,9 @@ option_list <- list(
make_option(c("--cnv"), dest="cnv"),
make_option(c("--cnr"), dest="cnr"),
make_option(c("--chr"), dest="chr"),
make_option(c("--threads"), dest="threads", default=8, type="integer")
make_option(c("--threads"), dest="threads", default=8, type="integer"),
make_option(c("--wl"), dest="wl", default=1000, type="integer"),
make_option(c("--version"), action="store_true", default=FALSE)
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
......@@ -18,13 +20,32 @@ arguments = parse_args(parser, positional_arguments=TRUE)
opt = arguments$options
args = arguments$args
if (opt$version) {
cat(toString(packageVersion('cn.mops')))
quit("no")
}
chromosome <- opt$chr
CNVoutput <- opt$cnv
CNRoutput <- opt$cnr
windowLength <- opt$wl
bamFile <- args
BAMFiles <- c(bamFile)
bamDataRanges <- getReadCountsFromBAM(BAMFiles, mode="paired", refSeqName=chromosome, WL=1000, parallel=opt$threads)
BAMFiles <- strsplit(c(bamFile), " ")[[1]]
bamDataRanges <- tryCatch(
{
getReadCountsFromBAM(BAMFiles, mode="paired", refSeqName=chromosome, WL=windowLength, parallel=opt$threads)
},
error = function(e) {
quit("no")
},
warning = function(w) {
quit("no")
},
finally = {
}
)
write.table(as.data.frame( bamDataRanges ), quote = FALSE, opt$rawoutput, row.names=FALSE)
......@@ -43,7 +64,7 @@ dir.create(chromosome, showWarnings=FALSE, recursive=TRUE, mode="0744")
# Plot chromosome per sample.
for ( i in 1:length(BAMFiles)){
png(file=paste(chromosome,"/",chromosome,"-segplot-",i,".png", sep=""),
png(file=paste(dirname(opt$rawoutput),"/",chromosome,"-segplot-",i,".png", sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white"
)
......@@ -56,7 +77,7 @@ for ( i in 1:length(BAMFiles)){
# Plot cnvr regions.
for ( i in 1:nrow(as.data.frame(cnvr(res)))) {
png(file=paste(chromosome,"/",chromosome,"-cnv-",i,".png",sep=""),
png(file=paste(dirname(opt$rawoutput),"/",chromosome,"-cnv-",i,".png",sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
......
......@@ -4,7 +4,6 @@ library('naturalsort')
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list <- list(
make_option(c("-m", "--mappability"), dest="mappability"),
make_option(c("-p", "--ploidy"), default=2, type="integer", dest="ploidy"),
make_option(c("-i", "--input"), dest="input"),
make_option(c("-o", "--output"), dest="output")
......@@ -14,17 +13,6 @@ parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
opt = parse_args(parser)
#
# Load mappability track
#
mappabilityFile <- opt$mappability
mappabilityTrack <- read.table(mappabilityFile, header=FALSE, col.names=c("chrom", "start", "end", "score"))
mappabilityTrack$Start <- mappabilityTrack$start+1
mappabilityTrack$Chromosome <- gsub("chr", "", mappabilityTrack$chrom)
#
# Load Data
#
......@@ -37,9 +25,7 @@ chromosomes <- naturalsort(levels(input_ratio$Chromosome))
input_ratio$Chromosome <- factor(input_ratio$Chromosome, levels=chromosomes, ordered=T)
sorted_ratio <- input_ratio[order(input_ratio$Chromosome),]
ratio <- merge(sorted_ratio, mappabilityTrack, sort=TRUE)
ratio <- ratio[order(ratio$Chromosome, ratio$Start),]
ratio <- input_ratio[order(input_ratio$Chromosome, input_ratio$Start),]
ploidy <- opt$ploidy
ppi <- 300
......@@ -99,15 +85,6 @@ for (i in chromosomes) {
dev.off()
}
png(filename = paste(opt$output, ".png",sep=""), width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(6,4))
......@@ -151,7 +128,7 @@ dev.off()
# Export the whole genome graph
png(filename = paste(opt$output, ".wg.png",sep=""), width = 16 * ppi, height = 10 * ppi,
png(filename = paste(opt$output, ".wg.png",sep=""), width = 50 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
plot_margins <- c(3,4,2,2)+0.1
......
import argparse
from os.path import join
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
class BamRatioReader(object):
"""
Reader object for bam_ratio.txt files
"""
def __init__(self, filename):
self.filename = filename
self.__handle = open(filename)
def __reset_handle(self):
self.__handle.close()
self.__handle = open(self.filename)
def get_chromosome(self, chromosome):
self.__reset_handle()
lines = []
for line in self.__handle:
if line.split("\t")[0] == chromosome:
lines.append(line)
return lines
@property
def chromosomes(self):
self.__reset_handle()
chrs = []
for x in self.__handle:
if "Chromosome" not in x:
chrs.append(x.split("\t")[0])
return list(set(chrs))
def get_cn_lines(lines, cn_type="diploid" ,ploidy=2):
"""
For a list if lines, return those lines belonging to a certain cn_typle
:param lines: list of lines
:param cn_type: either "normal" (cn= ploidy), "loss" (cn < ploidy) or "gain" (cn > ploidy)
:return: list of lines
"""
n_lines = []
for line in lines:
cn = line.strip().split("\t")[-1]
if cn_type == "normal" and int(cn) == ploidy:
n_lines.append(line)
elif cn_type == "loss" and int(cn) < ploidy:
n_lines.append(line)
elif cn_type == "gain" and int(cn) > ploidy:
n_lines.append(line)
return n_lines
def lines_to_array(lines, ploidy=2):
"""
Convert list of lines to numpy array of [start, ratio]
"""
tmp = []
for x in lines:
start = x.split("\t")[1]
ratio = x.split("\t")[2]
tmp.append([int(start), float(ratio)*ploidy])
return np.array(tmp)
def plot_chromosome(lines, chromosome, output_file, ploidy):
"""
Plot lines belonging to a chromosome
green = where CN = ploidy
red = where CN > ploidy
blue = where CN < ploidy
"""
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
normals = lines_to_array(get_cn_lines(lines, "normal", ploidy), ploidy)
losses = lines_to_array(get_cn_lines(lines, "loss", ploidy), ploidy)
gains = lines_to_array(get_cn_lines(lines, "gain", ploidy), ploidy)
print("Plotting chromosome {0}".format(chromosome))
all_x = []
if len(normals) > 0:
ax.scatter(normals[:, 0], normals[:,1], color="g")
for x in normals[:, 0]:
all_x += [x]
if len(losses) > 0:
ax.scatter(losses[:, 0], losses[:,1], color="b")
for x in losses[:, 0]:
all_x += [x]
if len(gains) > 0:
for x in gains[:, 0]:
all_x += [x]
ax.scatter(gains[:,0], gains[:,1], color="r")
ax.set_ylim(0, ploidy*3)
ax.set_xlim(int(0-(max(all_x)*0.1)), int(max(all_x)+(max(all_x)*0.1)))
ax.set_xlabel("chromosome position")
ax.set_ylabel("CN")
ax.set_title("Chromosome {0}".format(chromosome))
plt.savefig(output_file)
plt.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-I', "--input", required=True, help="Input bam_ratio.txt file")
parser.add_argument('-O', '--output-prefix', required=True, help="Path to output prefix")
parser.add_argument("-p", "--ploidy", type=int, default=2, help="Ploidy of sample")
args = parser.parse_args()
reader = BamRatioReader(args.input)
for chromosome in reader.chromosomes:
ofile = args.output_prefix + "." + "chr{0}.png".format(chromosome)
lines = reader.get_chromosome(chromosome)
plot_chromosome(lines, chromosome, ofile, args.ploidy)
\ No newline at end of file
......@@ -16,43 +16,102 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.Version
import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import org.broadinstitute.gatk.utils.commandline._
import nl.lumc.sasc.biopet.utils.getSemanticVersion
/**
* Wrapper for the Cnmops command line tool.
* Written based on Cnmops version v2.2.1.
*/
class Cnmops(val root: Configurable) extends RscriptCommandLineFunction {
class Cnmops(val root: Configurable) extends RscriptCommandLineFunction with Version {
override def defaultThreads = 4
override def defaultCoreMemory: Double = 4.0
protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/cnmops.R")
def versionCommand = {
val v = super.cmdLine + "--version"
v.trim.replace("'", "")
}
def versionRegex = "(\\d+\\.\\d+\\.\\d+)".r
private def stringToInt(s: String): Option[Int] = {
try {
Some(s.toInt)
} catch {
case e: Exception => None
}
}
/**
* Check whether version of cn mops is at least 1.18.0
*
* @return
*/
def versionCheck: Boolean = {
getVersion.flatMap(getSemanticVersion(_)) match {
case Some(version) => (version.major == 1 && version.minor >= 18) || version.major >= 2
case _ => false
}
}
@Input(doc = "Input file BAM", required = true)
var input: List[File] = List()
@Argument(doc = "Chromsomosome to query", required = true)
var chromosome: String = _
@Argument(doc = "Window length", required = false)
var windowLength: Int = config("window_length", namespace = "kopisu", default = 1000)
// output files, computed automatically from output directory
@Output(doc = "Output CNV file")
private lazy val outputCnv: File = {
require(outputDir == null, "Unexpected error when trying to set cn.MOPS CNV output")
new File(outputDir, "cnv.txt")
lazy val outputCnv: File = {
outputDir match {
case Some(dir) => new File(dir, "cnv.txt")
case _ => throw new IllegalArgumentException("Unexpected error when trying to set cn.MOPS CNV output")
}
}
@Output(doc = "Output CNR file")
private lazy val outputCnr: File = {
require(outputDir == null, "Unexpected error when trying to set cn.MOPS CNR output")
new File(outputDir, "cnr.txt")
lazy val outputCnr: File = {
outputDir match {
case Some(dir) => new File(dir, "cnr.txt")
case _ => throw new IllegalArgumentException("Unexpected error when trying to set cn.MOPS CNR output")
}
}
@Output(doc = "Raw output")
lazy val rawOutput: File = {
outputDir match {
case Some(dir) => new File(dir, "rawoutput.txt")
case _ => throw new IllegalArgumentException("Unexpected error when trying to set cn.MOPS raw output")
}
}
/** write all output files to this directory [./] */
var outputDir: String = _
var outputDir: Option[File] = None
override def beforeGraph = {
super.beforeGraph
require(!outputDir.isEmpty, "Outputdir for cn.MOPS should not be empty")
require(outputDir.isDefined, "Outputdir for cn.MOPS should not be empty")
require(input.length >= 2, "Please supply at least 2 BAM files for cn.MOPS")
if (!versionCheck) {
logger.warn("cn.mops version is below 1.18.0. Contigs containing little to no reads WILL fail")
}
}
override def cmdLine = super.cmdLine +
required(input.foreach(f => f.getAbsolutePath).toString.mkString(" "))
required("--cnr", outputCnr) +
required("--cnv", outputCnv) +
required("--chr", chromosome) +
required("--rawoutput", rawOutput) +
required("--threads", threads) +
optional("--wl", windowLength) +
repeat(input)
}
......@@ -22,6 +22,8 @@ import org.broadinstitute.gatk.utils.commandline._
class FreeC(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
override def defaults = Map("max_walltime_limit" -> 7200)
@Input(doc = "BAMfile", required = true)
var input: File = _
......@@ -125,7 +127,7 @@ class FreeC(val root: Configurable) extends BiopetCommandLineFunction with Refer
override def versionCommand = executable
override def versionRegex = """Control-FREEC v([0-9\.]+) : .*""".r
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
override def defaultCoreMemory = 50
private var configFile: File = _
......
......@@ -16,12 +16,12 @@ package nl.lumc.sasc.biopet.extensions.freec
import java.io.File
import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction
import nl.lumc.sasc.biopet.core.extensions.{ PythonCommandLineFunction, RscriptCommandLineFunction }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class FreeCCNVPlot(val root: Configurable) extends RscriptCommandLineFunction {
protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/freec/freec_CNVPlot.R")
class FreeCCNVPlot(val root: Configurable) extends PythonCommandLineFunction {
setPythonScript("freec_CNVPlot.py")
@Input(doc = "Output file from FreeC. *_CNV", required = true)
var input: File = null
......@@ -29,12 +29,7 @@ class FreeCCNVPlot(val root: Configurable) extends RscriptCommandLineFunction {
@Output(doc = "Destination for the PNG file", required = true)
var output: File = null
/**
* cmdLine to execute R-script and with arguments
* Arguments should be pasted in the same order as the script is expecting it.
* Unless some R library is used for named arguments
*/
override def cmdLine = super.cmdLine +
required("-i", input) +
required("-o", output)
override def cmdLine = getPythonCommand +
required("-I", input) +
required("-O", output)
}
package nl.lumc.sasc.biopet.extensions
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by Sander Bollen on 17-10-16.
*/
class CnmopsTest extends TestNGSuite with Matchers {
@Test
def testVersionCommand() = {
val cn = new Cnmops(null)
cn.versionCommand.endsWith("--version") shouldBe true
cn.versionCommand.split(" ").head.endsWith("Rscript") shouldBe true
}
}
......@@ -16,7 +16,7 @@ package nl.lumc.sasc.biopet.pipelines.kopisu
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
import nl.lumc.sasc.biopet.pipelines.kopisu.methods.{ ConiferMethod, FreecMethod }
import nl.lumc.sasc.biopet.pipelines.kopisu.methods.{ CnmopsMethod, ConiferMethod, FreecMethod }
import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
......@@ -45,9 +45,13 @@ class Kopisu(val root: Configurable) extends QScript with SummaryQScript with Re
Some(new ConiferMethod(this))
} else None
lazy val cnMopsMethod = if (config("use_cnmops_method", default = false)) {
Some(new CnmopsMethod(this))
} else None
// This script is in fact FreeC only.
def biopetScript() {
if (freecMethod.isEmpty && coniferMethod.isEmpty) Logging.addError("No method selected")
if (freecMethod.isEmpty && coniferMethod.isEmpty && cnMopsMethod.isEmpty) Logging.addError("No CNV method selected")
freecMethod.foreach { method =>
method.inputBams = inputBams
......@@ -61,13 +65,21 @@ class Kopisu(val root: Configurable) extends QScript with SummaryQScript with Re
add(method)
}
cnMopsMethod.foreach { method =>
method.inputBams = inputBams
method.outputDir = new File(outputDir, "cnmops_method")
add(method)
}
addSummaryJobs()
}
/** Must return a map with used settings for this pipeline */
def summarySettings: Map[String, Any] = Map(
"reference" -> referenceSummary,
"freec_method" -> freecMethod.isDefined
"freec_method" -> freecMethod.isDefined,
"conifer_method" -> coniferMethod.isDefined,
"cnmops_method" -> cnMopsMethod.isDefined
)
/** File to put in the summary for thie pipeline */
......
package nl.lumc.sasc.biopet.pipelines.kopisu.methods
import htsjdk.samtools.{ SAMSequenceDictionary, SamReaderFactory }
import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.extensions.Cnmops
import nl.lumc.sasc.biopet.utils.config.Configurable
import scala.collection.JavaConversions._
/**
* Created by wyleung on 2-6-16.
*/
class CnmopsMethod(val root: Configurable) extends CnvMethod with Reference {
def name = "cnmops"
def biopetScript: Unit = {
// we repeat running cnmops for all chromosomes
val cnmopsJobs = referenceDict.getSequences.map(contig => {
val cnmops = new Cnmops(this)
cnmops.chromosome = contig.getSequenceName
cnmops.input = inputBams.flatMap {
case (sampleName, bamFile) => Some(bamFile)
case _ => None
}.toList
cnmops.outputDir = Some(new File(outputDir, contig.getSequenceName))
cnmops.beforeGraph
cnmops
}).toList
addAll(cnmopsJobs)
// adding output files to the outputSummary
cnmopsJobs.foreach(job => {
addOutput(job.chromosome, job.rawOutput)
})
addSummaryJobs()
}
}
......@@ -37,7 +37,15 @@ trait CnvMethod extends QScript with SummaryQScript with Reference {
def summarySettings: Map[String, Any] = Map()
/** File to put in the summary for thie pipeline */
def summaryFiles: Map[String, File] = inputBams.map(x => s"inputbam_${x._1}" -> x._2)
def summaryFiles: Map[String, File] = inputBams.map(x => s"inputbam_${x._1}" -> x._2) ++ cnvOutputFiles
def init() = {}
protected var cnvOutputFiles: Map[String, File] = Map.empty
def getCnvOutputFiles: Map[String, File] = cnvOutputFiles
protected def addOutput(sample: String, outputFile: File) = {
cnvOutputFiles += (sample -> outputFile)
}
}
......@@ -67,6 +67,7 @@ class ConiferMethod(val root: Configurable) extends CnvMethod {
coniferCall.input = coniferAnalyze.output
coniferCall.output = new File(sampleDir, s"${sampleName}.calls.txt")
add(coniferCall)