Commit 4355447e authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'waiyi' into 'master'

Waiyi

merge for ticket #54

See merge request !12
parents 4eff4dde fb266355
......@@ -21,9 +21,11 @@ Any value defined here will override the pom.xml file value but is only applicab
<netbeans.hint.license>apache20</netbeans.hint.license>
<com-junichi11-netbeans-changelf.enable>true</com-junichi11-netbeans-changelf.enable>
<com-junichi11-netbeans-changelf.use-project>true</com-junichi11-netbeans-changelf.use-project>
<com-junichi11-netbeans-changelf.lf-kind>CRLF</com-junichi11-netbeans-changelf.lf-kind>
<com-junichi11-netbeans-changelf.lf-kind>LF</com-junichi11-netbeans-changelf.lf-kind>
<com-junichi11-netbeans-changelf.use-global>false</com-junichi11-netbeans-changelf.use-global>
<netbeans.checkstyle.format>true</netbeans.checkstyle.format>
<com-junichi11-netbeans-changelf.show-dialog>true</com-junichi11-netbeans-changelf.show-dialog>
<org-netbeans-modules-javascript2-requirejs.enabled>true</org-netbeans-modules-javascript2-requirejs.enabled>
<netbeans.hint.jdkPlatform>JDK_1.8</netbeans.hint.jdkPlatform>
</properties>
</project-shared-configuration>
......@@ -2,6 +2,7 @@
<actions>
<action>
<actionName>run</actionName>
<preAction>build-with-dependencies</preAction>
<packagings>
<packaging>jar</packaging>
</packagings>
......
......@@ -207,6 +207,14 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<showDeprecation>true</showDeprecation>
</configuration>
</plugin>
</plugins>
</build>
</project>
package nl.lumc.sasc.biopet.scripts
package nl.lumc.sasc.biopet.extensions
import java.io.File
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/*
* Wrapper around the seqstat implemented in D
*
*/
import argonaut._, Argonaut._
import scalaz._, Scalaz._
import scala.io.Source
import scala.collection.mutable.Map
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
class Seqstat(val root: Configurable) extends PythonCommandLineFunction {
setPythonScript("__init__.py", "pyfastqc/")
setPythonScript("seq_stat.py")
class Seqstat(val root: Configurable) extends BiopetCommandLineFunction {
override val defaultVmem = "4G"
@Input(doc = "Fastq input", shortName = "fastqc", required = true)
var input_fastq: File = _
@Input(doc = "Input FastQ", required = true)
var input: File = _
@Output(doc = "Output file", shortName = "out", required = true)
var out: File = _
@Output(doc = "JSON summary", required = true)
var output: File = _
var fmt: String = _
executable = config("exe", default = "fastq-seqstat")
def cmdLine = {
getPythonCommand +
optional("--fmt", fmt) +
required("-o", out) +
required(input_fastq)
}
def cmdLine = required(executable) + required(input) + " > " + required(output)
def getSummary: Json = {
val json = Parse.parseOption(Source.fromFile(out).mkString)
val json = Parse.parseOption(Source.fromFile(output).mkString)
if (json.isEmpty) return jNull
else return json.get.fieldOrEmptyObject("stats")
}
}
object Seqstat {
def apply(root: Configurable, input: File, output: File): Seqstat = {
val seqstat = new Seqstat(root)
seqstat.input = input
seqstat.output = output
return seqstat
}
def apply(root: Configurable, fastqfile: File, outDir: String): Seqstat = {
val seqstat = new Seqstat(root)
val ext = fastqfile.getName.substring(fastqfile.getName.lastIndexOf("."))
seqstat.input_fastq = fastqfile
seqstat.out = new File(outDir + fastqfile.getName.substring(0, fastqfile.getName.lastIndexOf(".")) + ".seqstats.json")
seqstat.input = fastqfile
seqstat.output = new File(outDir + fastqfile.getName.substring(0, fastqfile.getName.lastIndexOf(".")) + ".seqstats.json")
return seqstat
}
......
......@@ -24,9 +24,8 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
@Output(doc = "Output file SAM", shortName = "output")
var output: File = _
// options set via API or config
// var numrecords: String = config("numrecords", default = "all")
// var numrecords: String = config("numrecords", default = "all")
var solexa: Boolean = config("solexa", default = false)
var solexaold: Boolean = config("solexaold", default = false)
var sanger: Boolean = config("sanger", default = false)
......@@ -53,7 +52,7 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
override def versionCommand = executable + " --help"
def cmdLine : String = {
def cmdLine: String = {
var cmd: String = required(executable) +
optional("-t", nCoresRequest) +
conditional(solexa, "--solexa") +
......@@ -65,7 +64,7 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
// Optionally start Mate Pair alignment, if set, the aligner will
// assign MP reads as MP, otherwise in PE mode, these reads will
// be aligned with the bits RR or FF showing a False Inversion event
if ( insertsd2.getOrElse(-1) != -1 ) {
if (insertsd2.getOrElse(-1) != -1) {
cmd += optional("--insertsize2", insertsize2) +
optional("--insertsd2", insertsd2)
}
......
package nl.lumc.sasc.biopet.extensions.sambamba
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
abstract class Sambamba extends BiopetCommandLineFunction {
override val defaultVmem = "4G"
override val defaultThreads = 2
executable = config("exe", default = "sambamba", submodule = "sambamba", freeVar = false)
override def versionCommand = executable
override val versionRegex = """sambamba v(.*)""".r
override val versionExitcode = List(0, 1)
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.sambamba
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
class SambambaFlagstat(val root: Configurable) extends Sambamba {
override val defaultThreads = 2
@Input(doc = "Bam File")
var input: File = _
@Output(doc = "output File")
var output: File = _
def cmdLine = required(executable) +
required("flagstat") +
optional("-t", nCoresRequest) +
required(input) +
" > " +
required(output)
}
object SambambaFlagstat {
def apply(root: Configurable, input: File, output: File): SambambaFlagstat = {
val flagstat = new SambambaFlagstat(root)
flagstat.input = input
flagstat.output = output
return flagstat
}
def apply(root: Configurable, input: File, outputDir: String): SambambaFlagstat = {
val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/"
val outputFile = new File(dir + swapExtension(input.getName))
return apply(root, input, outputFile)
}
def apply(root: Configurable, input: File): SambambaFlagstat = {
return apply(root, input, new File(swapExtension(input.getAbsolutePath)))
}
private def swapExtension(inputFile: String) = inputFile.stripSuffix(".bam") + ".flagstat"
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.sambamba
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
class SambambaIndex(val root: Configurable) extends Sambamba {
override val defaultThreads = 2
@Input(doc = "Bam File")
var input: File = _
@Output(doc = "Output .bai file to")
var output: File = _
def cmdLine = required(executable) +
required("index") +
optional("-t", nCoresRequest) +
required(input) +
required(output)
}
object SambambaIndex {
def apply(root: Configurable, input: File, output: File): SambambaIndex = {
val flagstat = new SambambaIndex(root)
flagstat.input = input
flagstat.output = output
return flagstat
}
def apply(root: Configurable, input: File, outputDir: String): SambambaIndex = {
val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/"
val outputFile = new File(dir + swapExtension(input.getName))
return apply(root, input, outputFile)
}
def apply(root: Configurable, input: File): SambambaIndex = {
return apply(root, input, new File(swapExtension(input.getAbsolutePath)))
}
private def swapExtension(inputFile: String) = inputFile.stripSuffix(".bam") + ".bam.bai"
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.sambamba
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
class SambambaMarkdup(val root: Configurable) extends Sambamba {
override val defaultThreads = 4
@Input(doc = "Bam File")
var input: File = _
@Output(doc = "Markdup output bam")
var output: File = _
var remove_duplicates: Boolean = config("remove_duplicates", default = false)
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val compression_level: Option[Int] = config("compression_level", default=6)
val hash_table_size: Option[Int] = config("hash-table-size", default=262144)
val overflow_list_size: Option[Int] = config("overflow-list-size", default=200000)
val io_buffer_size: Option[Int] = config("io-buffer-size", default=128)
def cmdLine = required(executable) +
required("markdup") +
conditional(remove_duplicates, "--remove-duplicates") +
optional("-t", nCoresRequest) +
optional("-l", compression_level) +
optional("--hash-table-size=", hash_table_size, spaceSeparated=false ) +
optional("--overflow-list-size=", overflow_list_size, spaceSeparated=false ) +
optional("--io-buffer-size=", io_buffer_size, spaceSeparated=false ) +
required(input) +
required(output)
}
object SambambaMarkdup {
def apply(root: Configurable, input: File, output: File): SambambaMarkdup = {
val flagstat = new SambambaMarkdup(root)
flagstat.input = input
flagstat.output = output
return flagstat
}
def apply(root: Configurable, input: File, outputDir: String): SambambaMarkdup = {
val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/"
val outputFile = new File(dir + swapExtension(input.getName))
return apply(root, input, outputFile)
}
def apply(root: Configurable, input: File): SambambaMarkdup = {
return apply(root, input, new File(swapExtension(input.getAbsolutePath)))
}
private def swapExtension(inputFile: String) = inputFile.stripSuffix(".bam") + ".bam.bai"
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.sambamba
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
class SambambaMerge(val root: Configurable) extends Sambamba {
override val defaultThreads = 4
@Input(doc = "Bam File[s]")
var input: List[File] = Nil
@Output(doc = "Output merged bam PATH")
var output: File = _
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val compression_level: Option[Int] = config("compression_level", default=6)
def cmdLine = required(executable) +
required("merge") +
optional("-t", nCoresRequest) +
optional("-l", compression_level) +
required(output) +
repeat("", input)
}
object SambambaMerge {
def apply(root: Configurable, input: List[File], output: File): SambambaMerge = {
val flagstat = new SambambaMerge(root)
flagstat.input = input
flagstat.output = output
return flagstat
}
def apply(root: Configurable, input: List[File], outputDir: String): SambambaMerge = {
val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/"
val outputFile = new File(dir + swapExtension(input.head.getName))
return apply(root, input, outputFile)
}
def apply(root: Configurable, input: List[File]): SambambaMerge = {
return apply(root, input, new File(swapExtension(input.head.getAbsolutePath)))
}
private def swapExtension(inputFile: String) = inputFile.stripSuffix(".bam") + ".merge.bam"
}
//
//object MergeSamFiles {
// def apply(root: Configurable, input: List[File], outputDir: String, sortOrder: String = null): MergeSamFiles = {
// val mergeSamFiles = new MergeSamFiles(root)
// mergeSamFiles.input = input
// mergeSamFiles.output = new File(outputDir, input.head.getName.stripSuffix(".bam").stripSuffix(".sam") + ".merge.bam")
// if (sortOrder == null) mergeSamFiles.sortOrder = "coordinate"
// else mergeSamFiles.sortOrder = sortOrder
// return mergeSamFiles
// }
//}
\ No newline at end of file
......@@ -3,6 +3,7 @@ package nl.lumc.sasc.biopet.extensions.svcallers
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import org.broadinstitute.gatk.queue.QScript
import nl.lumc.sasc.biopet.core.BiopetQScript
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import java.io.File
......@@ -10,33 +11,34 @@ import java.io.File
class BreakdancerConfig(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "bam2cfg.pl", freeVar = false)
@Input(doc = "Bam File")
var input: File = _
@Output(doc = "Output File")
var output: File = _
var MIN_MQ: Option[Int] = config("min_mq", default = 20) // minimum of MQ to consider for taking read into histogram
var USE_MQ: Boolean = config("use_mq", default = false)
var MIN_INSERTSIZE: Option[Int] = config("min_insertsize", default = 450)
var SOLID_DATA: Boolean = config("solid", default = false)
var SD_CUTOFF: Option[Int] = config("sd_cutoff", default = 4) // Cutoff in unit of standard deviation [4]
var min_mq: Option[Int] = config("min_mq", default = 20) // minimum of MQ to consider for taking read into histogram
var use_mq: Boolean = config("use_mq", default = false)
var min_insertsize: Option[Int] = config("min_insertsize", default = 450)
var solid_data: Boolean = config("solid", default = false)
var sd_cutoff: Option[Int] = config("sd_cutoff", default = 4) // Cutoff in unit of standard deviation [4]
// we set this to a higher number to avoid biases in small numbers in sorted bams
var MIN_OBSERVATIONS: Option[Int] = config("min_observations", default = 1000000) // Number of observation required to estimate mean and s.d. insert size [10_000]
var COEFVAR_CUTOFF: Option[Int] = config("coef_cutoff", default = 1) // Cutoff on coefficients of variation [1]
var HISTOGRAM_BINS: Option[Int] = config("histogram_bins", default = 50) // Number of bins in the histogram [50]
var min_observations: Option[Int] = config("min_observations", default = 10000) // Number of observation required to estimate mean and s.d. insert size [10_000]
var coefvar_cutoff: Option[Int] = config("coef_cutoff", default = 1) // Cutoff on coefficients of variation [1]
var histogram_bins: Option[Int] = config("histogram_bins", default = 50) // Number of bins in the histogram [50]
def cmdLine = required(executable) +
optional("-q", MIN_MQ) +
conditional(USE_MQ, "-m") +
optional("-s", MIN_INSERTSIZE) +
conditional(SOLID_DATA, "-s") +
optional("-c", SD_CUTOFF) +
optional("-n", MIN_OBSERVATIONS) +
optional("-v", COEFVAR_CUTOFF) +
optional("-b", HISTOGRAM_BINS)
required(input) + " > " + required(output)
optional("-q", min_mq) +
conditional(use_mq, "-m") +
optional("-s", min_insertsize) +
conditional(solid_data, "-s") +
optional("-c", sd_cutoff) +
optional("-n", min_observations) +
optional("-v", coefvar_cutoff) +
optional("-b", histogram_bins) +
required(input) + " 1> " + required(output)
}
object BreakdancerConfig {
......@@ -63,43 +65,87 @@ object BreakdancerConfig {
/*
* The caller
*
**/
class BreakdancerCaller(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "breakdancer-max", freeVar = false)
override val defaultVmem = "4G"
override val defaultThreads = 8
override val defaultThreads = 1 // breakdancer can only work on 1 single thread
override val versionRegex = """.*[Vv]ersion:? (.*)""".r
// override val versionExitcode = List(0, 1)
override val versionExitcode = List(1)
override def versionCommand = executable
@Input(doc = "Input file (bam)")
@Input(doc = "The breakdancer configuration file")
var input: File = _
@Argument(doc = "Work directory")
var workdir: String = _
// @Argument(doc = "Work directory")
// var workdir: String = _
@Output(doc = "Breakdancer VCF output")
var output: File = _
// var T: Option[Int] = config("T", default = defaultThreads)
var f: Boolean = config("f", default = true) // delete work directory before running
// var w: String = config("w", default = workdir + "/work")
var a: Boolean = config("a", default = false) // don't recompute AS tags
var k: Boolean = config("k", default = false) // keep working directory
var r: Boolean = config("r", default = false) // take read groups into account
/*
Options:
-o STRING operate on a single chromosome [all chromosome]
-s INT minimum length of a region [7]
-c INT cutoff in unit of standard deviation [3]
-m INT maximum SV size [1000000000]
-q INT minimum alternative mapping quality [35]
-r INT minimum number of read pairs required to establish a connection [2]
-x INT maximum threshold of haploid sequence coverage for regions to be ignored [1000]
-b INT buffer size for building connection [100]
-t only detect transchromosomal rearrangement, by default off
-d STRING prefix of fastq files that SV supporting reads will be saved by library
-g STRING dump SVs and supporting reads in BED format for GBrowse
-l analyze Illumina long insert (mate-pair) library
-a print out copy number and support reads per library rather than per bam, by default off
-h print out Allele Frequency column, by default off
-y INT output score filter [30]
*/
var s: Option[Int] = config("s", default = 7)
var c: Option[Int] = config("c", default = 3)
var m: Option[Int] = config("m", default = 1000000000)
var q: Option[Int] = config("qs", default = 35)
var r: Option[Int] = config("r", default = 2)
var x: Option[Int] = config("x", default = 1000)
var b: Option[Int] = config("b", default = 100)
var t: Boolean = config("t", default = false)
var d: String = config("d")
var g: String = config("g")
var l: Boolean = config("l", default = false)
var a: Boolean = config("a", default = false)
var h: Boolean = config("h", default = false)
var y: Option[Int] = config("y", default = 30)
override def beforeCmd {
if (workdir == null) throw new Exception("Breakdancer :: Workdirectory is not defined")
// if (input.getName.endsWith(".sort.bam")) sorted = true
}
def cmdLine = required(executable) +
required(input) + ">" + required(output)
optional("-s", s) +
optional("-c", c) +
optional("-m", m) +
optional("-q", q) +
optional("-r", r) +
optional("-x", x) +
optional("-b", b) +
conditional(t ,"-t") +
optional("-d", d) +
optional("-g", g) +
conditional(l ,"-l") +
conditional(a ,"-a") +
conditional(h ,"-h") +
optional("-y", y) +
required(input) +
">" +
required(output)
}
object BreakdancerCaller {
......@@ -124,40 +170,51 @@ class Breakdancer(val root: Configurable) extends QScript with BiopetQScript {
@Argument(doc = "Work directory")
var workdir: String = _
@Output(doc = "Breakdancer VCF output")
var output: File = _
// @Output(doc = "Breakdancer VCF output")
// lazy val outputvcf: File = {
// new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.vcf")
// }
@Output(doc = "Breakdancer config")
lazy val configfile: File = {
new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.cfg")
}
@Output(doc = "Breakdancer raw output")
lazy val outputraw: File = {
new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.tsv")
}
override def init() {
}
def biopetScript() {
// write the pipeline here
// start with QC, alignment, call sambamba, call sv callers, reporting
// read config and set all parameters for the pipeline
logger.info("Starting Breakdancer")
logger.info("Starting Breakdancer configuration")
val bdcfg = BreakdancerConfig(this, input, workdir)