Commit 475293ea authored by Peter van 't Hof's avatar Peter van 't Hof

Merge remote-tracking branch 'remotes/origin/develop' into feature_annotation_versions

# Conflicts:
#	biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala
parents d8ffa403 7fd9c6c6
......@@ -38,6 +38,8 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile
else new File(".")
}
require(outputDir.canRead, s"No premision to read outputdir: $outputDir")
require(outputDir.canWrite, s"No premision to read outputdir: $outputDir")
@Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
var disableScatter: Boolean = false
......@@ -67,6 +69,9 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
final def script() {
outputDir = config("output_dir")
outputDir = outputDir.getAbsoluteFile
BiopetQScript.checkOutputDir(outputDir)
init()
biopetScript()
logger.info("Biopet script done")
......@@ -153,4 +158,13 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
object BiopetQScript {
case class InputFile(file: File, md5: Option[String] = None)
def checkOutputDir(outputDir: File): Unit = {
// Sanity checks
require(outputDir.getParentFile.canRead, s"No premision to read parent of outputdir: ${outputDir.getParentFile}")
require(outputDir.getParentFile.canWrite, s"No premision to write parent of outputdir: ${outputDir.getParentFile}")
outputDir.mkdir()
require(outputDir.canRead, s"No premision to read outputdir: $outputDir")
require(outputDir.canWrite, s"No premision to write outputdir: $outputDir")
}
}
......@@ -40,16 +40,22 @@ trait CommandLineResources extends CommandLineFunction with Configurable {
def defaultResidentFactor: Double = 1.2
var vmemFactor: Double = config("vmem_factor", default = defaultVmemFactor)
val useSge: Boolean = config("use_sge", default = true)
var residentFactor: Double = config("resident_factor", default = defaultResidentFactor)
private var _coreMemory: Double = 2.0
def coreMemory = _coreMemory
/** This value is for SGE and is defined in seconds */
protected val maxWalltimeLimit: Option[Int] = config("max_walltime_limit")
var retry = 0
override def freezeFieldValues(): Unit = {
setResources()
if (vmem.isDefined) jobResourceRequests :+= "h_vmem=" + vmem.get
if (useSge && vmem.isDefined) jobResourceRequests :+= s"h_vmem=${vmem.get}"
if (useSge && maxWalltimeLimit.isDefined) jobResourceRequests :+= s"h_rt=${maxWalltimeLimit.get}"
super.freezeFieldValues()
}
......
......@@ -77,6 +77,7 @@ trait PipelineCommand extends MainCommand with GatkLogging with ImplicitConversi
val pipelineName = this.getClass.getSimpleName.toLowerCase.split("""\$""").head
val pipelineConfig = globalConfig.map.getOrElse(pipelineName, Map()).asInstanceOf[Map[String, Any]]
val pipelineOutputDir = new File(globalConfig.map.getOrElse("output_dir", pipelineConfig.getOrElse("output_dir", "./")).toString)
BiopetQScript.checkOutputDir(pipelineOutputDir)
val logDir: File = new File(pipelineOutputDir, ".log")
logDir.mkdirs()
new File(logDir, "biopet." + BiopetQCommandLine.timestamp + ".log")
......
......@@ -16,11 +16,11 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import htsjdk.samtools.SAMSequenceDictionary
import htsjdk.samtools.reference.{ FastaSequenceFile, IndexedFastaSequenceFile }
import htsjdk.samtools.reference.IndexedFastaSequenceFile
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.utils.{ LazyCheck, BamUtils, ConfigUtils, FastaUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, FastaUtils, Logging }
import scala.collection.JavaConversions._
......
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library('cn.mops'))
suppressPackageStartupMessages(library('optparse'))
suppressWarnings(suppressPackageStartupMessages(library('cn.mops')))
suppressWarnings(suppressPackageStartupMessages(library('optparse')))
# Script from https://git.lumc.nl/lgtc-bioinformatics/gapss3/blob/master/src/CNV/makeCnmops.sh
# modified to take arguments
......@@ -10,7 +10,9 @@ option_list <- list(
make_option(c("--cnv"), dest="cnv"),
make_option(c("--cnr"), dest="cnr"),
make_option(c("--chr"), dest="chr"),
make_option(c("--threads"), dest="threads", default=8, type="integer")
make_option(c("--threads"), dest="threads", default=8, type="integer"),
make_option(c("--wl"), dest="wl", default=1000, type="integer"),
make_option(c("--version"), action="store_true", default=FALSE)
)
parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
......@@ -18,13 +20,32 @@ arguments = parse_args(parser, positional_arguments=TRUE)
opt = arguments$options
args = arguments$args
if (opt$version) {
cat(toString(packageVersion('cn.mops')))
quit("no")
}
chromosome <- opt$chr
CNVoutput <- opt$cnv
CNRoutput <- opt$cnr
windowLength <- opt$wl
bamFile <- args
BAMFiles <- c(bamFile)
bamDataRanges <- getReadCountsFromBAM(BAMFiles, mode="paired", refSeqName=chromosome, WL=1000, parallel=opt$threads)
BAMFiles <- strsplit(c(bamFile), " ")[[1]]
bamDataRanges <- tryCatch(
{
getReadCountsFromBAM(BAMFiles, mode="paired", refSeqName=chromosome, WL=windowLength, parallel=opt$threads)
},
error = function(e) {
quit("no")
},
warning = function(w) {
quit("no")
},
finally = {
}
)
write.table(as.data.frame( bamDataRanges ), quote = FALSE, opt$rawoutput, row.names=FALSE)
......@@ -43,7 +64,7 @@ dir.create(chromosome, showWarnings=FALSE, recursive=TRUE, mode="0744")
# Plot chromosome per sample.
for ( i in 1:length(BAMFiles)){
png(file=paste(chromosome,"/",chromosome,"-segplot-",i,".png", sep=""),
png(file=paste(dirname(opt$rawoutput),"/",chromosome,"-segplot-",i,".png", sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white"
)
......@@ -56,7 +77,7 @@ for ( i in 1:length(BAMFiles)){
# Plot cnvr regions.
for ( i in 1:nrow(as.data.frame(cnvr(res)))) {
png(file=paste(chromosome,"/",chromosome,"-cnv-",i,".png",sep=""),
png(file=paste(dirname(opt$rawoutput),"/",chromosome,"-cnv-",i,".png",sep=""),
width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(1,1))
......
......@@ -4,7 +4,6 @@ library('naturalsort')
# Script taken from http://bioinfo-out.curie.fr/projects/freec/tutorial.html and modified for biopet
option_list <- list(
make_option(c("-m", "--mappability"), dest="mappability"),
make_option(c("-p", "--ploidy"), default=2, type="integer", dest="ploidy"),
make_option(c("-i", "--input"), dest="input"),
make_option(c("-o", "--output"), dest="output")
......@@ -14,17 +13,6 @@ parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
opt = parse_args(parser)
#
# Load mappability track
#
mappabilityFile <- opt$mappability
mappabilityTrack <- read.table(mappabilityFile, header=FALSE, col.names=c("chrom", "start", "end", "score"))
mappabilityTrack$Start <- mappabilityTrack$start+1
mappabilityTrack$Chromosome <- gsub("chr", "", mappabilityTrack$chrom)
#
# Load Data
#
......@@ -37,9 +25,7 @@ chromosomes <- naturalsort(levels(input_ratio$Chromosome))
input_ratio$Chromosome <- factor(input_ratio$Chromosome, levels=chromosomes, ordered=T)
sorted_ratio <- input_ratio[order(input_ratio$Chromosome),]
ratio <- merge(sorted_ratio, mappabilityTrack, sort=TRUE)
ratio <- ratio[order(ratio$Chromosome, ratio$Start),]
ratio <- input_ratio[order(input_ratio$Chromosome, input_ratio$Start),]
ploidy <- opt$ploidy
ppi <- 300
......@@ -99,15 +85,6 @@ for (i in chromosomes) {
dev.off()
}
png(filename = paste(opt$output, ".png",sep=""), width = 16 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
par(mfrow = c(6,4))
......@@ -151,7 +128,7 @@ dev.off()
# Export the whole genome graph
png(filename = paste(opt$output, ".wg.png",sep=""), width = 16 * ppi, height = 10 * ppi,
png(filename = paste(opt$output, ".wg.png",sep=""), width = 50 * ppi, height = 10 * ppi,
res=ppi, bg = "white")
plot_margins <- c(3,4,2,2)+0.1
......
import argparse
from os.path import join
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
class BamRatioReader(object):
"""
Reader object for bam_ratio.txt files
"""
def __init__(self, filename):
self.filename = filename
self.__handle = open(filename)
def __reset_handle(self):
self.__handle.close()
self.__handle = open(self.filename)
def get_chromosome(self, chromosome):
self.__reset_handle()
lines = []
for line in self.__handle:
if line.split("\t")[0] == chromosome:
lines.append(line)
return lines
@property
def chromosomes(self):
self.__reset_handle()
chrs = []
for x in self.__handle:
if "Chromosome" not in x:
chrs.append(x.split("\t")[0])
return list(set(chrs))
def get_cn_lines(lines, cn_type="diploid" ,ploidy=2):
"""
For a list if lines, return those lines belonging to a certain cn_typle
:param lines: list of lines
:param cn_type: either "normal" (cn= ploidy), "loss" (cn < ploidy) or "gain" (cn > ploidy)
:return: list of lines
"""
n_lines = []
for line in lines:
cn = line.strip().split("\t")[-1]
if cn_type == "normal" and int(cn) == ploidy:
n_lines.append(line)
elif cn_type == "loss" and int(cn) < ploidy:
n_lines.append(line)
elif cn_type == "gain" and int(cn) > ploidy:
n_lines.append(line)
return n_lines
def lines_to_array(lines, ploidy=2):
"""
Convert list of lines to numpy array of [start, ratio]
"""
tmp = []
for x in lines:
start = x.split("\t")[1]
ratio = x.split("\t")[2]
tmp.append([int(start), float(ratio)*ploidy])
return np.array(tmp)
def plot_chromosome(lines, chromosome, output_file, ploidy):
"""
Plot lines belonging to a chromosome
green = where CN = ploidy
red = where CN > ploidy
blue = where CN < ploidy
"""
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
normals = lines_to_array(get_cn_lines(lines, "normal", ploidy), ploidy)
losses = lines_to_array(get_cn_lines(lines, "loss", ploidy), ploidy)
gains = lines_to_array(get_cn_lines(lines, "gain", ploidy), ploidy)
print("Plotting chromosome {0}".format(chromosome))
all_x = []
if len(normals) > 0:
ax.scatter(normals[:, 0], normals[:,1], color="g")
for x in normals[:, 0]:
all_x += [x]
if len(losses) > 0:
ax.scatter(losses[:, 0], losses[:,1], color="b")
for x in losses[:, 0]:
all_x += [x]
if len(gains) > 0:
for x in gains[:, 0]:
all_x += [x]
ax.scatter(gains[:,0], gains[:,1], color="r")
ax.set_ylim(0, ploidy*3)
ax.set_xlim(int(0-(max(all_x)*0.1)), int(max(all_x)+(max(all_x)*0.1)))
ax.set_xlabel("chromosome position")
ax.set_ylabel("CN")
ax.set_title("Chromosome {0}".format(chromosome))
plt.savefig(output_file)
plt.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-I', "--input", required=True, help="Input bam_ratio.txt file")
parser.add_argument('-O', '--output-prefix', required=True, help="Path to output prefix")
parser.add_argument("-p", "--ploidy", type=int, default=2, help="Ploidy of sample")
args = parser.parse_args()
reader = BamRatioReader(args.input)
for chromosome in reader.chromosomes:
ofile = args.output_prefix + "." + "chr{0}.png".format(chromosome)
lines = reader.get_chromosome(chromosome)
plot_chromosome(lines, chromosome, ofile, args.ploidy)
\ No newline at end of file
......@@ -16,43 +16,102 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.Version
import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import org.broadinstitute.gatk.utils.commandline._
import nl.lumc.sasc.biopet.utils.getSemanticVersion
/**
* Wrapper for the Cnmops command line tool.
* Written based on Cnmops version v2.2.1.
*/
class Cnmops(val root: Configurable) extends RscriptCommandLineFunction {
class Cnmops(val root: Configurable) extends RscriptCommandLineFunction with Version {
override def defaultThreads = 4
override def defaultCoreMemory: Double = 4.0
protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/cnmops.R")
def versionCommand = {
val v = super.cmdLine + "--version"
v.trim.replace("'", "")
}
def versionRegex = "(\\d+\\.\\d+\\.\\d+)".r
private def stringToInt(s: String): Option[Int] = {
try {
Some(s.toInt)
} catch {
case e: Exception => None
}
}
/**
* Check whether version of cn mops is at least 1.18.0
*
* @return
*/
def versionCheck: Boolean = {
getVersion.flatMap(getSemanticVersion(_)) match {
case Some(version) => (version.major == 1 && version.minor >= 18) || version.major >= 2
case _ => false
}
}
@Input(doc = "Input file BAM", required = true)
var input: List[File] = List()
@Argument(doc = "Chromsomosome to query", required = true)
var chromosome: String = _
@Argument(doc = "Window length", required = false)
var windowLength: Int = config("window_length", namespace = "kopisu", default = 1000)
// output files, computed automatically from output directory
@Output(doc = "Output CNV file")
private lazy val outputCnv: File = {
require(outputDir == null, "Unexpected error when trying to set cn.MOPS CNV output")
new File(outputDir, "cnv.txt")
lazy val outputCnv: File = {
outputDir match {
case Some(dir) => new File(dir, "cnv.txt")
case _ => throw new IllegalArgumentException("Unexpected error when trying to set cn.MOPS CNV output")
}
}
@Output(doc = "Output CNR file")
private lazy val outputCnr: File = {
require(outputDir == null, "Unexpected error when trying to set cn.MOPS CNR output")
new File(outputDir, "cnr.txt")
lazy val outputCnr: File = {
outputDir match {
case Some(dir) => new File(dir, "cnr.txt")
case _ => throw new IllegalArgumentException("Unexpected error when trying to set cn.MOPS CNR output")
}
}
@Output(doc = "Raw output")
lazy val rawOutput: File = {
outputDir match {
case Some(dir) => new File(dir, "rawoutput.txt")
case _ => throw new IllegalArgumentException("Unexpected error when trying to set cn.MOPS raw output")
}
}
/** write all output files to this directory [./] */
var outputDir: String = _
var outputDir: Option[File] = None
override def beforeGraph = {
super.beforeGraph
require(!outputDir.isEmpty, "Outputdir for cn.MOPS should not be empty")
require(outputDir.isDefined, "Outputdir for cn.MOPS should not be empty")
require(input.length >= 2, "Please supply at least 2 BAM files for cn.MOPS")
if (!versionCheck) {
logger.warn("cn.mops version is below 1.18.0. Contigs containing little to no reads WILL fail")
}
}
override def cmdLine = super.cmdLine +
required(input.foreach(f => f.getAbsolutePath).toString.mkString(" "))
required("--cnr", outputCnr) +
required("--cnv", outputCnv) +
required("--chr", chromosome) +
required("--rawoutput", rawOutput) +
required("--threads", threads) +
optional("--wl", windowLength) +
repeat(input)
}
......@@ -29,7 +29,6 @@ class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Input file", required = true)
var input: List[File] = Nil
@Argument(doc = "Set flanking amount")
var flanking: Option[Int] = config("flanking")
var exclusionRegions: List[File] = config("exclusion_regions", default = Nil)
......
......@@ -26,6 +26,50 @@ class Centrifuge(val root: Configurable) extends BiopetCommandLineFunction with
@Output(doc = "Output with hits per sequence")
var report: Option[File] = None
@Output(required = false)
var un: Option[File] = None
@Output(required = false)
var al: Option[File] = None
@Output(required = false)
var unConc: Option[File] = None
@Output(required = false)
var alConc: Option[File] = None
@Output(required = false)
var metFile: Option[File] = None
// Input args
var q: Boolean = config("q", default = false)
var qseq: Boolean = config("qseq", default = false)
var f: Boolean = config("f", default = false)
var r: Boolean = config("r", default = false)
var c: Boolean = config("c", default = false)
var skip: Option[Int] = config("skip")
var upto: Option[Int] = config("upto")
var trim5: Option[Int] = config("trim5")
var trim3: Option[Int] = config("trim3")
var phred33: Boolean = config("phred33", default = false)
var phred64: Boolean = config("phred64", default = false)
var intQuals: Boolean = config("int_quals", default = false)
var ignoreQuals: Boolean = config("ignore_quals", default = false)
var nofw: Boolean = config("nofw", default = false)
var norc: Boolean = config("norc", default = false)
// Classification args
var minHitlen: Option[Int] = config("min_hitlen")
var minTotallen: Option[Int] = config("min_totallen")
var hostTaxids: List[Int] = config("host_taxids", default = Nil)
var excludeTaxids: List[Int] = config("exclude_taxids", default = Nil)
// Output args
var t: Boolean = config("t", default = false)
var quiet: Boolean = config("quiet", default = false)
var metStderr: Boolean = config("met_stderr", default = false)
var met: Option[Int] = config("met")
override def defaultThreads = 8
executable = config("exe", default = "centrifuge", freeVar = false)
......@@ -49,7 +93,34 @@ class Centrifuge(val root: Configurable) extends BiopetCommandLineFunction with
* @return Command to run
*/
def cmdLine: String = executable +
//TODO: Options
conditional(q, "-q") +
conditional(qseq, "--qseq") +
conditional(f, "-f") +
conditional(r, "-r") +
conditional(c, "-c") +
optional("--skip", skip) +
optional("--upto", upto) +
optional("--trim5", trim5) +
optional("--trim3", trim3) +
conditional(phred33, "--phred33") +
conditional(phred64, "--phred64") +
conditional(intQuals, "--int-quals") +
conditional(ignoreQuals, "--ignore-quals") +
conditional(nofw, "--nofw") +
conditional(norc, "--norc") +
optional("--min-hitlen", minHitlen) +
optional("--min-totallen", minTotallen) +
optional("--host-taxids", if (hostTaxids.nonEmpty) Some(hostTaxids.mkString(",")) else None) +
optional("--exclude-taxids", if (excludeTaxids.nonEmpty) Some(excludeTaxids.mkString(",")) else None) +
optional("--met-file", metFile) +
conditional(t, "-t") +
conditional(quiet, "--quiet") +
conditional(metStderr, "--met-stderr") +
optional("--met", met) +
optional(if (un.exists(_.getName.endsWith(".gz"))) "--un-gz" else "--un", un) +
optional(if (al.exists(_.getName.endsWith(".gz"))) "--al-gz" else "--al", al) +
optional(if (unConc.exists(_.getName.endsWith(".gz"))) "--un-conc-gz" else "--un-conc", unConc) +
optional(if (alConc.exists(_.getName.endsWith(".gz"))) "--al-conc-gz" else "--al-conc", alConc) +
optional("--threads", threads) +
required("-x", index) +
(inputR2 match {
......
......@@ -22,6 +22,8 @@ import org.broadinstitute.gatk.utils.commandline._
class FreeC(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
override def defaults = Map("max_walltime_limit" -> 7200)
@Input(doc = "BAMfile", required = true)
var input: File = _
......@@ -125,7 +127,7 @@ class FreeC(val root: Configurable) extends BiopetCommandLineFunction with Refer
override def versionCommand = executable
override def versionRegex = """Control-FREEC v([0-9\.]+) : .*""".r
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
override def defaultCoreMemory = 50
private var configFile: File = _
......
......@@ -16,12 +16,12 @@ package nl.lumc.sasc.biopet.extensions.freec
import java.io.File
import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction
import nl.lumc.sasc.biopet.core.extensions.{ PythonCommandLineFunction, RscriptCommandLineFunction }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class FreeCCNVPlot(val root: Configurable) extends RscriptCommandLineFunction {
protected var script: File = new File("/nl/lumc/sasc/biopet/extensions/freec/freec_CNVPlot.R")
class FreeCCNVPlot(val root: Configurable) extends PythonCommandLineFunction {
setPythonScript("freec_CNVPlot.py")
@Input(doc = "Output file from FreeC. *_CNV", required = true)
var input: File = null
......@@ -29,12 +29,7 @@ class FreeCCNVPlot(val root: Configurable) extends RscriptCommandLineFunction {
@Output(doc = "Destination for the PNG file", required = true)
var output: File = null
/**
* cmdLine to execute R-script and with arguments
* Arguments should be pasted in the same order as the script is expecting it.
* Unless some R library is used for named arguments
*/
override def cmdLine = super.cmdLine +
required("-i", input) +
required("-o", output)
override def cmdLine = getPythonCommand +
required("-I", input) +
required("-O", output)
}
package nl.lumc.sasc.biopet.extensions
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by Sander Bollen on 17-10-16.
*/
class CnmopsTest extends TestNGSuite with Matchers {
@Test
def testVersionCommand() = {
val cn = new Cnmops(null)
cn.versionCommand.endsWith("--version") shouldBe true
cn.versionCommand.split(" ").head.endsWith("Rscript") shouldBe true
}
}
......@@ -31,6 +31,7 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.ExtractAlignedFastq,
nl.lumc.sasc.biopet.tools.FastqSplitter,
nl.lumc.sasc.biopet.tools.FastqSync,
nl.lumc.sasc.biopet.tools.FastqFilter,
nl.lumc.sasc.biopet.tools.FindRepeatsPacBio,
nl.lumc.sasc.biopet.tools.FindOverlapMatch,