Merge branch 'develop' into feature-vcf_stats

25a8db4f · Peter van 't Hof · 3ce22c45 · 74077f92 · 25a8db4f · 25a8db4f
Commit 25a8db4f authored 10 years ago by Peter van 't Hof
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
+  <component name="EntryPointsManager">
+    <entry_points version="2.0" />
+  </component>
  <component name="MavenProjectsManager">
    <option name="originalFiles">
      <list>

--- a/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala
+++ b/protected/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala
@@ -41,7 +41,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
    var outputSnps: FastaOutput = _

    protected def addJobs(): Unit = {
-      addLibsJobs()
+      addPerLibJobs()
      output = addGenerateFasta(sampleId, sampleDir)
      outputSnps = addGenerateFasta(sampleId, sampleDir, snpsOnly = true)
    }
@@ -56,11 +56,13 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
    gatkPipeline.biopetScript
    addAll(gatkPipeline.functions)

+    addSamplesJobs()
+  }
+
+  def addMultiSampleJobs(): Unit = {
    val refVariants = addGenerateFasta(null, outputDir + "reference/", outputName = "reference")
    val refVariantSnps = addGenerateFasta(null, outputDir + "reference/", outputName = "reference", snpsOnly = true)

-    addSamplesJobs()
-
    val catVariants = Cat(this, refVariants.variants :: samples.map(_._2.output.variants).toList, outputDir + "fastas/variant.fasta")
    add(catVariants)
    val catVariantsSnps = Cat(this, refVariantSnps.variants :: samples.map(_._2.outputSnps.variants).toList, outputDir + "fastas/variant.snps_only.fasta")
@@ -122,13 +124,14 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {

      val gubbins = new RunGubbins(this)
      gubbins.fastafile = concensusVariants
-      gubbins.startingTree = raxmlBi.getBipartitionsFile
+      gubbins.startingTree = Some(raxmlBi.getBipartitionsFile)
      gubbins.outputDirectory = outputDir + dirSufixGubbins
      add(gubbins)
    }

    addTreeJobs(catVariantsSnps.output, catConsensusVariantsSnps.output, outputDir + "trees" + File.separator + "snps_only", "snps_only")
    addTreeJobs(catVariants.output, catConsensusVariants.output, outputDir + "trees" + File.separator + "snps_indels", "snps_indels")
+
  }

  def addGenerateFasta(sampleName: String, outputDir: String, outputName: String = null,

--- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala
+++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala
@@ -12,7 +12,7 @@ class BaseRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.q
  memoryLimit = Option(4)
  override val defaultVmem = "8G"

-  if (config.contains("scattercount")) scatterCount = config("scattercount")
+  if (config.contains("scattercount")) scatterCount = config("scattercount", default = 1)
  if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString)
  if (config.contains("known_sites")) knownSites :+= new File(config("known_sites").asString)
 }

--- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala
+++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala
@@ -19,7 +19,7 @@ trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction {

  if (config.contains("intervals")) intervals = config("intervals").asFileList
  if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList
-  reference_sequence = config("reference")
-  gatk_key = config("gatk_key")
+  reference_sequence = config("reference", required = true)
+  if (config.contains("gatk_key")) gatk_key = config("gatk_key")
  if (config.contains("pedigree")) pedigree = config("pedigree").asFileList
 }
--- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala
+++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala
@@ -36,7 +36,6 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri

  var singleSampleCalling = config("single_sample_calling", default = true)
  var reference: File = config("reference", required = true)
-  var dbsnp: File = config("dbsnp")
  var useAllelesOption: Boolean = config("use_alleles_option", default = false)
  val externalGvcfs = config("external_gvcfs_files", default = Nil).asFileList

@@ -72,7 +71,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
            samToFastq.isIntermediate = true
            qscript.add(samToFastq)
            mapping.input_R1 = samToFastq.fastqR1
-            mapping.input_R2 = samToFastq.fastqR2
+            mapping.input_R2 = Some(samToFastq.fastqR2)
            mapping.init
            mapping.biopetScript
            addAll(mapping.functions) // Add functions of mapping to curent function pool
@@ -127,7 +126,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
    gatkVariantcalling.outputDir = sampleDir + "/variantcalling/"

    protected def addJobs(): Unit = {
-      addLibsJobs()
+      addPerLibJobs()
      gatkVariantcalling.inputBams = libraries.map(_._2.mapping.finalBamFile).toList
      gatkVariantcalling.preProcesBams = false
      if (!singleSampleCalling) {
@@ -150,10 +149,11 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
    override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
  }

-  def biopetScript() {
-    addSamplesJobs
+  def biopetScript(): Unit = {
+    addSamplesJobs()
+  }

-    //SampleWide jobs
+  def addMultiSampleJobs(): Unit = {
    val gvcfFiles: List[File] = if (mergeGvcfs && externalGvcfs.size + samples.size > 1) {
      val newFile = outputDir + "merged.gvcf.vcf.gz"
      add(CombineGVCFs(this, externalGvcfs ++ samples.map(_._2.gatkVariantcalling.scriptOutput.gvcfFile), newFile))

--- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala
+++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala
@@ -32,9 +32,6 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
  @Argument(doc = "Reference", shortName = "R", required = false)
  var reference: File = config("reference", required = true)

-  @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false)
-  var dbsnp: File = config("dbsnp")
-
  @Argument(doc = "OutputName", required = false)
  var outputName: String = _


--- a/public/biopet-framework/src/main/resources/nl/lumc/sasc/biopet/scripts/sync_paired_end_reads.py
+++ b/public/biopet-framework/src/main/resources/nl/lumc/sasc/biopet/scripts/sync_paired_end_reads.py
-#!/usr/bin/env python
-#
-# Biopet is built on top of GATK Queue for building bioinformatic
-# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
-# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
-# should also be able to execute Biopet tools and pipelines.
-#
-# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
-#
-# Contact us at: sasc@lumc.nl
-#
-# A dual licensing mode is applied. The source code within this project that are
-# not part of GATK Queue is freely available for non-commercial use under an AGPL
-# license; For commercial users or users who do not want to follow the AGPL
-# license, please contact us to obtain a separate license.
-#
-
-"""
-(Re-)sync two filtered paired end FASTQ files.
-
-Given two filtered paired end read files and one of the original read files,
-re-sync the filtered reads by filtering out anything that is only present in
-one of the two files.
-
-Usage:
-  {command} <orig.fq> <reads_1.fq> <reads_2.fq> \\
-      <reads_1.synced.fq> <reads_2.synced.fq>
-
-The synced reads are written to disk as <reads_1.synced.fq> and
-<reads_2.synced.fq>. Afterwards some counts are printed.
-
-
-Both Illumina old-style and new-style paired-end header lines are supported.
-
-The original read file is used to speed up processing: it contains all
-possible reads from both edited reads (in all files in the same order) so it
-can process all files line by line, not having to read a single file in
-memory. Some ideas were taken from [1].
-
-[1] https://gist.github.com/588841/
-
-2011-11-03, Martijn Vermaat <m.vermaat.hg@lumc.nl>
-"""
-
-
-import sys
-import re
-
-
-def sync_paired_end_reads(original, reads_a, reads_b, synced_a, synced_b):
-    """
-    Filter out reads from two paired end read files that are not present in
-    both of them. Do this in a reasonable amount of time by using a file
-    containing all of the reads for one of the paired ends.
-
-    All arguments are open file handles.
-
-    @arg original: File containing all original reads for one of the paired
-                   ends.
-    @arg reads_a:  First from paired end read files.
-    @arg reads_b:  Second from paired end read files.
-    @arg synced_a: Filtered reads from first paired end read file.
-    @arg synced_b: Filtered reads from second paired end read file.
-
-    @return:       Triple (filtered_a, filtered_b, kept) containing counts
-                   of the number of reads filtered from both input files and
-                   the total number of reads kept in the synced results.
-
-    @todo: Print warnings if obvious things are not right (a or b still has
-           lines after original is processed).
-    """
-    # This matches 1, 2, or 3 preceded by / _ or whitespace. Its rightmost
-    # match in a header line is used to identify the read pair.
-    sep = re.compile('[\s_/][123]')
-
-    def next_record(fh):
-        return [fh.readline().strip() for i in range(4)]
-
-    def head(record):
-        return sep.split(record[0])[:-1]
-
-    headers = (sep.split(x.strip())[:-1] for i, x in enumerate(original)
-               if not (i % 4))
-
-    filtered_a = filtered_b = kept = 0
-
-    a, b = next_record(reads_a), next_record(reads_b)
-
-    for header in headers:
-        if header == head(a) and head(b) != header:
-            a = next_record(reads_a)
-            filtered_a += 1
-
-        if header == head(b) and head(a) != header:
-            b = next_record(reads_b)
-            filtered_b += 1
-
-        if header == head(a) == head(b):
-            print >>synced_a, '\n'.join(a)
-            print >>synced_b, '\n'.join(b)
-            a, b = next_record(reads_a), next_record(reads_b)
-            kept += 1
-
-    return filtered_a, filtered_b, kept
-
-
-if __name__ == '__main__':
-    if len(sys.argv) < 6:
-        sys.stderr.write(__doc__.split('\n\n\n')[0].strip().format(
-            command=sys.argv[0]) + '\n')
-        sys.exit(1)
-    try:
-        original = open(sys.argv[1], 'r')
-        reads_a = open(sys.argv[2], 'r')
-        reads_b = open(sys.argv[3], 'r')
-        synced_a = open(sys.argv[4], 'w')
-        synced_b = open(sys.argv[5], 'w')
-        filtered_a, filtered_b, kept = \
-                    sync_paired_end_reads(original, reads_a, reads_b,
-                                          synced_a, synced_b)
-        print 'Filtered %i reads from first read file.' % filtered_a
-        print 'Filtered %i reads from second read file.' % filtered_b
-        print 'Synced read files contain %i reads.' % kept
-    except IOError as (_, message):
-        sys.stderr.write('Error: %s\n' % message)
-        sys.exit(1)
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala
@@ -38,7 +38,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
  val defaultThreads = 1

  @Argument(doc = "Vmem", required = false)
-  var vmem: String = _
+  var vmem: Option[String] = None
  val defaultVmem: String = ""

  @Argument(doc = "Executable", required = false)
@@ -58,9 +58,9 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
    if (threads == 0) threads = getThreads(defaultThreads)
    if (threads > 1) nCoresRequest = Option(threads)

-    if (vmem == null) {
+    if (vmem.isEmpty) {
      vmem = config("vmem")
-      if (vmem == null && !defaultVmem.isEmpty) vmem = defaultVmem
+      if (vmem.isEmpty && defaultVmem.nonEmpty) vmem = Some(defaultVmem)
    }
    if (vmem != null) jobResourceRequests :+= "h_vmem=" + vmem
    jobName = configName + ":" + firstOutput.getName

--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala
@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.core

 import java.io.File
 import java.io.PrintWriter
-import nl.lumc.sasc.biopet.core.config.{ Config, Configurable }
+import nl.lumc.sasc.biopet.core.config.{ ConfigValueIndex, Config, Configurable }
 import org.broadinstitute.gatk.utils.commandline.Argument
 import org.broadinstitute.gatk.queue.QSettings
 import org.broadinstitute.gatk.queue.function.QFunction
@@ -29,7 +29,14 @@ trait BiopetQScript extends Configurable with GatkLogging {
  @Argument(doc = "JSON config file(s)", fullName = "config_file", shortName = "config", required = false)
  val configfiles: List[File] = Nil

-  var outputDir: String = _
+  var outputDir: String = {
+    val temp = Config.getValueFromMap(Config.global.map, ConfigValueIndex(this.configName, configPath, "output_dir"))
+    if (temp.isEmpty) throw new IllegalArgumentException("No output_dir defined in config")
+    else {
+      val t = temp.get.value.toString
+      if (!t.endsWith("/")) t + "/" else t
+    }
+  }

  @Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
  var disableScatter: Boolean = false

--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala
@@ -26,7 +26,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Argument }
 */
 trait MultiSampleQScript extends BiopetQScript {
  @Argument(doc = "Only Sample", shortName = "sample", required = false)
-  val onlySample: List[String] = Nil
+  private val onlySamples: List[String] = Nil

  require(Config.global.map.contains("samples"), "No Samples found in config")

@@ -94,7 +94,7 @@ trait MultiSampleQScript extends BiopetQScript {
    protected def addJobs()

    /** function add all libraries in one call */
-    protected final def addLibsJobs(): Unit = {
+    protected final def addPerLibJobs(): Unit = {
      for ((libraryId, library) <- libraries) {
        library.addAndTrackJobs()
      }
@@ -108,7 +108,7 @@ trait MultiSampleQScript extends BiopetQScript {
    def createFile(suffix: String) = new File(sampleDir, sampleId + suffix)

    /** Returns sample directory */
-    def sampleDir = outputDir + "samples" + File.pathSeparator + sampleId + File.pathSeparator
+    def sampleDir = outputDir + "samples" + File.separator + sampleId + File.separator
  }

  /** Sample type, need implementation in pipeline */
@@ -125,17 +125,24 @@ trait MultiSampleQScript extends BiopetQScript {
  val samples: Map[String, Sample] = sampleIds.map(id => id -> makeSample(id)).toMap

  /** Returns a list of all sampleIDs */
-  protected def sampleIds: Set[String] = if (onlySample != Nil) onlySample.toSet else {
-    ConfigUtils.any2map(Config.global.map("samples")).keySet
-  }
+  protected def sampleIds: Set[String] = ConfigUtils.any2map(Config.global.map("samples")).keySet

  /** Runs addAndTrackJobs method for each sample */
  final def addSamplesJobs() {
-    for ((sampleId, sample) <- samples) {
-      sample.addAndTrackJobs()
-    }
+    if (onlySamples.isEmpty) {
+      samples.foreach { case (sampleId, sample) => sample.addAndTrackJobs() }
+      addMultiSampleJobs()
+    } else onlySamples.foreach(sampleId => samples.get(sampleId) match {
+      case Some(sample) => sample.addAndTrackJobs()
+      case None         => logger.warn("sampleId '" + sampleId + "' not found")
+    })
  }

+  /**
+   * Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given
+   */
+  def addMultiSampleJobs()
+
  /** Stores sample state */
  private var currentSample: Option[String] = None


--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala
@@ -43,7 +43,7 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction {
  override val defaultThreads = 8

  var sam: Boolean = config("sam", default = true)
-  var sam_RG: String = config("sam-RG")
+  var sam_RG: Option[String] = config("sam-RG")
  var seedlen: Option[Int] = config("seedlen")
  var seedmms: Option[Int] = config("seedmms")
  var k: Option[Int] = config("k")

--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala
@@ -43,8 +43,8 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction {
  if (config.contains("front")) for (adapter <- config("front").asList) opt_front += adapter.toString

  var opt_discard: Boolean = config("discard", default = false)
-  var opt_minimum_length: String = config("minimum_length", 1)
-  var opt_maximum_length: String = config("maximum_length")
+  var opt_minimum_length: Option[Int] = config("minimum_length", 1)
+  var opt_maximum_length: Option[Int] = config("maximum_length")

  def cmdLine = required(executable) +
    // options

--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/PythonCommandLineFunction.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/PythonCommandLineFunction.scala
@@ -28,7 +28,14 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
  executable = config("exe", default = "python", submodule = "python")

  protected var python_script_name: String = _
-  def setPythonScript(script: String) { setPythonScript(script, "") }
+  def setPythonScript(script: String) {
+    python_script = new File(script)
+    if (!python_script.exists()) {
+      setPythonScript(script, "")
+    } else {
+      python_script_name = script
+    }
+  }
  def setPythonScript(script: String, subpackage: String) {
    python_script_name = script
    python_script = new File(".queue/tmp/" + subpackage + python_script_name)

--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Raxml.scala
@@ -60,11 +60,11 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction {
  private var out: List[File] = Nil

  var executableNonThreads: String = config("exe", default = "raxmlHPC")
-  var executableThreads: String = config("exe_pthreads")
+  var executableThreads: Option[String] = config("exe_pthreads")

  override def afterGraph {
    if (threads == 0) threads = getThreads(defaultThreads)
-    executable = if (threads > 1 && executableThreads != null) executableThreads else executableNonThreads
+    executable = if (threads > 1 && executableThreads.isDefined) executableThreads.get else executableNonThreads
    super.afterGraph
    out +:= getInfoFile
    f match {

--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/RunGubbins.scala
@@ -24,7 +24,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
 class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction {

  @Input(doc = "Contaminants", required = false)
-  var startingTree: File = config("starting_tree")
+  var startingTree: Option[File] = config("starting_tree")

  @Input(doc = "Fasta file", shortName = "FQ")
  var fastafile: File = _
@@ -36,21 +36,21 @@ class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction {
  var outputDirectory: String = _

  executable = config("exe", default = "run_gubbins.py")
-  var outgroup: String = config("outgroup")
-  var filterPercentage: String = config("filter_percentage")
-  var treeBuilder: String = config("tree_builder")
+  var outgroup: Option[String] = config("outgroup")
+  var filterPercentage: Option[String] = config("filter_percentage")
+  var treeBuilder: Option[String] = config("tree_builder")
  var iterations: Option[Int] = config("iterations")
  var minSnps: Option[Int] = config("min_snps")
-  var convergeMethod: String = config("converge_method")
+  var convergeMethod: Option[String] = config("converge_method")
  var useTimeStamp: Boolean = config("use_time_stamp", default = false)
-  var prefix: String = config("prefix")
+  var prefix: Option[String] = config("prefix")
  var verbose: Boolean = config("verbose", default = false)
  var noCleanup: Boolean = config("no_cleanup", default = false)

  override def afterGraph: Unit = {
    super.afterGraph
    jobLocalDir = new File(outputDirectory)
-    if (prefix == null) prefix = fastafile.getName
+    if (prefix.isEmpty) prefix = Some(fastafile.getName)
    val out: List[String] = List(".recombination_predictions.embl",
      ".recombination_predictions.gff",
      ".branch_base_reconstruction.embl",

--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala
@@ -43,7 +43,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction {
  var fastqc: Fastqc = _

  executable = config("exe", default = "sickle", freeVar = false)
-  var qualityType: String = config("qualitytype")
+  var qualityType: Option[String] = config("qualitytype")
  var qualityThreshold: Option[Int] = config("qualityThreshold")
  var lengthThreshold: Option[Int] = config("lengthThreshold")
  var noFiveprime: Boolean = config("noFiveprime", default = false)
@@ -54,7 +54,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction {
  override def versionCommand = executable + " --version"

  override def afterGraph {
-    if (qualityType == null && defaultQualityType != null) qualityType = defaultQualityType
+    if (qualityType.isEmpty) qualityType = Some(defaultQualityType)
  }

  def cmdLine = {

--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/WigToBigWig.scala
+package nl.lumc.sasc.biopet.extensions
+
+import java.io.File
+
+import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
+import nl.lumc.sasc.biopet.core.config.Configurable
+import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
+
+/**
+ * Created by pjvan_thof on 1/29/15.
+ * Versions from the executable are not reliable, this extension is based on md5 '3d033ff8a1f4ea9cb3ede12939561141' from the executable
+ */
+class WigToBigWig(val root: Configurable) extends BiopetCommandLineFunction {
+  @Input(doc = "Input wig file")
+  var inputWigFile: File = _
+
+  @Input(doc = "Input chrom sizes file")
+  var inputChromSizesFile: File = _
+
+  @Output(doc = "Output BigWig file")
+  var outputBigWig: File = _
+
+  executable = config("exe", default = "wigToBigWig")
+
+  var blockSize: Option[Int] = config("blockSize")
+  var itemsPerSlot: Option[Int] = config("itemsPerSlot")
+  var clip: Boolean = config("clip", default = false)
+  var unc: Boolean = config("unc", default = false)
+
+  def cmdLine = required(executable) +
+    optional("-blockSize=", blockSize, spaceSeparated = false) +
+    optional("-itemsPerSlot=", itemsPerSlot, spaceSeparated = false) +
+    conditional(clip, "-clip") +
+    conditional(unc, "-unc") +
+    required(inputWigFile) +
+    required(inputChromSizesFile) +
+    required(outputBigWig)
+}
\ No newline at end of file
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaMem.scala
@@ -34,7 +34,7 @@ class BwaMem(val root: Configurable) extends Bwa {
  @Output(doc = "Output file SAM", shortName = "output")
  var output: File = _

-  var R: String = config("R")
+  var R: Option[String] = config("R")
  var k: Option[Int] = config("k")
  var r: Option[Float] = config("r")
  var S: Boolean = config("S", default = false)
@@ -49,11 +49,11 @@ class BwaMem(val root: Configurable) extends Bwa {
  var e: Boolean = config("e", default = false)
  var A: Option[Int] = config("A")
  var B: Option[Int] = config("B")
-  var O: String = config("O")
-  var E: String = config("E")
-  var L: String = config("L")
+  var O: Option[String] = config("O")
+  var E: Option[String] = config("E")
+  var L: Option[String] = config("L")
  var U: Option[Int] = config("U")
-  var x: String = config("x")
+  var x: Option[String] = config("x")
  var p: Boolean = config("p", default = false)
  var v: Option[Int] = config("v")
  var T: Option[Int] = config("T")
@@ -61,7 +61,7 @@ class BwaMem(val root: Configurable) extends Bwa {
  var a: Boolean = config("a", default = false)
  var C: Boolean = config("C", default = false)
  var Y: Boolean = config("Y", default = false)
-  var I: String = config("I")
+  var I: Option[String] = config("I")

  override val defaultVmem = "6G"
  override val defaultThreads = 8

--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaSampe.scala
@@ -6,7 +6,11 @@ import nl.lumc.sasc.biopet.core.config.Configurable
 import org.broadinstitute.gatk.utils.commandline.{ Output, Input }

 /**
- * Created by pjvan_thof on 1/16/15.
+ * BWA sampe wrapper
+ *
+ * based on executable version 0.7.10-r789
+ *
+ * @param root Configurable
 */
 class BwaSampe(val root: Configurable) extends Bwa {
  @Input(doc = "Fastq file R1", required = true)
@@ -39,11 +43,14 @@ class BwaSampe(val root: Configurable) extends Bwa {
  var r: String = _

  def cmdLine = required(executable) +
-    required("samse") +
+    required("sampe") +
+    optional("-a", a) +
+    optional("-o", o) +
    optional("-n", n) +
+    optional("-N", N) +
+    optional("-c", c) +
    optional("-f", output) +
    optional("-r", r) +
-    optional("-c", c) +
    conditional(P, "-P") +
    conditional(s, "-s") +
    conditional(A, "-A") +

--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/Conifer.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/conifer/Conifer.scala
+/**
+ * Biopet is built on top of GATK Queue for building bioinformatic
+ * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
+ * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
+ * should also be able to execute Biopet tools and pipelines.
+ *
+ * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
+ *
+ * Contact us at: sasc@lumc.nl
+ *
+ * A dual licensing mode is applied. The source code within this project that are
+ * not part of GATK Queue is freely available for non-commercial use under an AGPL
+ * license; For commercial users or users who do not want to follow the AGPL
+ * license, please contact us to obtain a separate license.
+ */
+package nl.lumc.sasc.biopet.extensions.conifer
+
+import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
+import nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction
+
+abstract class Conifer extends PythonCommandLineFunction {
+  override def subPath = "conifer" :: super.subPath
+  //  executable = config("exe", default = "conifer")
+  setPythonScript(config("script", default = "conifer"))
+  override val versionRegex = """(.*)""".r
+  override val versionExitcode = List(0)
+  override def versionCommand = executable + " " + python_script + " --version"
+
+  override val defaultVmem = "8G"
+  override val defaultThreads = 1
+
+  def cmdLine = getPythonCommand
+
+}