diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala
index 60c25a5a69820b72a7e5bbd0f17cc8b5f0dac3fe..fb99be4a4f8716c8f96fb6be12d9caa264b249be 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala
@@ -24,6 +24,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
 
 import scala.collection.mutable
 import scala.io.Source
+import scala.util.matching.Regex
 
 /**
  * Extension for cutadapt
@@ -163,6 +164,51 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
     (if (outputAsStsout) "" else required("--output", fastqOutput) +
       " > " + required(statsOutput))
 
+  def extractClippedAdapters(statsOutput: File): Map[String, Any] = {
+    val histoCountRow: Regex = """([\d]+)\t([\d]+)\t.*""".r
+    val adapterR = """Sequence: ([C|T|A|G]+);.*Trimmed: ([\d]+) times\.""".r
+
+    val statsFile = Source.fromFile(statsOutput)
+    val adapterRawStats: Array[String] = statsFile.mkString
+      .split("=== Adapter [\\d]+ ===")
+      .filter(_.contains("Sequence")
+      )
+    statsFile.close()
+
+    adapterRawStats.map(adapter => {
+      var adapterName = ""
+      var adapterCount = 0
+      // identify the adapter name and count
+      for (line <- adapter.split("\n")) {
+        line match {
+          case adapterR(adapter, count) => {
+            adapterName = adapter
+            adapterCount = count.toInt
+          }
+          case _ =>
+        }
+      }
+
+      // parse the block that gives the histogram of clipped bases and from which end
+      val counts = adapter.split("Overview of removed sequences ")
+        .filter(x => x.contains("length"))
+        .map(clipSideRawStats => {
+          val clipSideLabel = if (clipSideRawStats.contains("5'")) { "5p" } else { "3p" }
+
+          val histogramValues = clipSideRawStats.split("\n").flatMap({
+            case histoCountRow(length, count) => Some(length.toInt -> count.toInt)
+            case _                            => None
+          })
+          clipSideLabel -> histogramValues.toMap
+        })
+
+      adapterName -> Map(
+        "count" -> adapterCount,
+        "histogram" -> counts.toMap
+      )
+    }).toMap // converting the Array[String] containing map-items to Map with 'toMap'
+  }
+
   /** Output summary stats */
   def summaryStats: Map[String, Any] = {
     /**
@@ -177,7 +223,6 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
     val tooLongR = """.* that were too long: *([,\d]+) .*""".r
 
     val tooManyN = """.* with too many N: *([,\d]+) .*""".r
-    val adapterR = """Sequence ([C|T|A|G]*);.*Trimmed: ([,\d]+) times.""".r
 
     val basePairsProcessed = """Total basepairs processed: *([,\d]+) bp""".r
     val basePairsWritten = """Total written \(filtered\): *([,\d]+) bp .*""".r
@@ -192,24 +237,28 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
       "bpoutput" -> 0,
       "toomanyn" -> 0
     )
-    val adapterStats: mutable.Map[String, Long] = mutable.Map()
+
+    // extract the adapters with its histogram
+    val adapterStats = if (statsOutput.exists) {
+      extractClippedAdapters(statsOutput)
+    } else Map.empty
 
     if (statsOutput.exists) {
       val statsFile = Source.fromFile(statsOutput)
       for (line <- statsFile.getLines()) {
         line match {
-          case processedReads(m)        => stats("processed") = m.replaceAll(",", "").toLong
-          case withAdapters(m)          => stats("withadapters") = m.replaceAll(",", "").toLong
-          case readsPassingFilters(m)   => stats("passingfilters") = m.replaceAll(",", "").toLong
-          case tooShortR(m)             => stats("tooshort") = m.replaceAll(",", "").toLong
-          case tooLongR(m)              => stats("toolong") = m.replaceAll(",", "").toLong
-          case tooManyN(m)              => stats("toomanyn") = m.replaceAll(",", "").toLong
-          case basePairsProcessed(m)    => stats("bpinput") = m.replaceAll(",", "").toLong
-          case basePairsWritten(m)      => stats("bpoutput") = m.replaceAll(",", "").toLong
-          case adapterR(adapter, count) => adapterStats += (adapter -> count.toLong)
-          case _                        =>
+          case processedReads(m)      => stats("processed") = m.replaceAll(",", "").toLong
+          case withAdapters(m)        => stats("withadapters") = m.replaceAll(",", "").toLong
+          case readsPassingFilters(m) => stats("passingfilters") = m.replaceAll(",", "").toLong
+          case tooShortR(m)           => stats("tooshort") = m.replaceAll(",", "").toLong
+          case tooLongR(m)            => stats("toolong") = m.replaceAll(",", "").toLong
+          case tooManyN(m)            => stats("toomanyn") = m.replaceAll(",", "").toLong
+          case basePairsProcessed(m)  => stats("bpinput") = m.replaceAll(",", "").toLong
+          case basePairsWritten(m)    => stats("bpoutput") = m.replaceAll(",", "").toLong
+          case _                      =>
         }
       }
+      statsFile.close()
     }
 
     val cleanReads = stats("processed") - stats("withadapters")
@@ -223,8 +272,8 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
       "num_reads_discarded_too_long" -> stats("toolong"),
       "num_reads_discarded_many_n" -> stats("toomanyn"),
       "num_bases_input" -> stats("bpinput"),
-      "num_based_output" -> stats("bpoutput"),
-      adaptersStatsName -> adapterStats.toMap
+      "num_bases_output" -> stats("bpoutput"),
+      adaptersStatsName -> adapterStats
     )
   }
 
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala
index 50c15dbabc3b51be862c5a3987704ff10a57106d..20b4ae8422936bfc5a8b3b8310074b01b46445a6 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala
@@ -156,9 +156,9 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
   override def beforeGraph(): Unit = {
     super.beforeGraph()
     if (!cache && !database) {
-      Logging.addError("Must supply either cache or database for VariantEffectPredictor")
+      Logging.addError("Must either set 'cache' or 'database' to true for VariantEffectPredictor")
     } else if (cache && dir.isEmpty) {
-      Logging.addError("Must supply dir to cache for VariantEffectPredictor")
+      Logging.addError("Must supply 'dir_cache' to cache for VariantEffectPredictor")
     }
     if (statsText) _summary = new File(output.getAbsolutePath + "_summary.txt")
   }
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/AnalyzeCovariates.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala
similarity index 98%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/AnalyzeCovariates.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala
index b501d47de6cb0899d8e02ae8a4372fd50de4f7e1..c0740c64008c0d887a879e445c08e41dc1a96cd6 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/AnalyzeCovariates.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala
@@ -1,9 +1,9 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.utils.config.Configurable
 import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
+import nl.lumc.sasc.biopet.utils.config.Configurable
 import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
 
 class AnalyzeCovariates(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ApplyRecalibration.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala
similarity index 96%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ApplyRecalibration.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala
index a84aa4b4b8728a1a5c7bbab442a4d905b626821a..b3be8d8578a2a0563274bfc88fa5cf8eb14df543 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ApplyRecalibration.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala
@@ -1,15 +1,12 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
 import nl.lumc.sasc.biopet.utils.VcfUtils
-import org.broadinstitute.gatk.utils.commandline.Argument
-import org.broadinstitute.gatk.utils.commandline.Gather
-import org.broadinstitute.gatk.utils.commandline.Input
-import org.broadinstitute.gatk.utils.commandline.Output
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
+import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output }
 
 class ApplyRecalibration(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
   def analysis_type = "ApplyRecalibration"
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BamGatherFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BamGatherFunction.scala
similarity index 95%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BamGatherFunction.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BamGatherFunction.scala
index eb821f0e47ec6c5b3444cb8bed7172ae6dc906eb..c7a55537e2750e1cb316dfe51d63f9874faa6a72 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BamGatherFunction.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BamGatherFunction.scala
@@ -1,9 +1,8 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
-
-import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
 import nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles
+import org.broadinstitute.gatk.queue.function.scattergather.GatherFunction
 
 /**
  * Merges BAM files using htsjdk.samtools.MergeSamFiles.
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala
similarity index 98%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala
index 828beeb64ea6396437fd195dbdb4718e2417f24a..7e5bbfd3ff4645d11bd2ee173005c713bf0f2458 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/BaseRecalibrator.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala
@@ -1,11 +1,10 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.queue.extensions.gatk.{ TaggedFile }
-import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
 import nl.lumc.sasc.biopet.utils.VcfUtils
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
 
 //TODO: check gathering
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala
index ceceed5f64ba51a75b098e4cd1b18beaa4f1894d..4d712a8407abb8f09b8e6e7fdcceaba8d11d2bb4 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala
@@ -1,58 +1,56 @@
-/**
- * Biopet is built on top of GATK Queue for building bioinformatic
- * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
- * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
- * should also be able to execute Biopet tools and pipelines.
- *
- * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
- *
- * Contact us at: sasc@lumc.nl
- *
- * A dual licensing mode is applied. The source code within this project that are
- * not part of GATK Queue is freely available for non-commercial use under an AGPL
- * license; For commercial users or users who do not want to follow the AGPL
- * license, please contact us to obtain a separate license.
- */
 package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.core.{ Reference, BiopetJavaCommandLineFunction }
+import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
 import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
+import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output }
 
-class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction with Reference {
+class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction {
+  analysisName = "CatVariants"
+  javaMainClass = "org.broadinstitute.gatk.tools.CatVariants"
 
-  javaMainClass = classOf[org.broadinstitute.gatk.tools.CatVariants].getName
+  /** genome reference file <name>.fasta */
+  @Input(fullName = "reference", shortName = "R", doc = "genome reference file <name>.fasta", required = true, exclusiveOf = "", validation = "")
+  var reference: File = _
 
-  @Input(required = true)
-  var inputFiles: List[File] = Nil
+  /** Input VCF file/s */
+  @Input(fullName = "variant", shortName = "V", doc = "Input VCF file/s", required = true, exclusiveOf = "", validation = "")
+  var variant: Seq[File] = Nil
 
-  @Output(required = true)
-  var outputFile: File = null
+  /** output file */
+  @Output(fullName = "outputFile", shortName = "out", doc = "output file", required = true, exclusiveOf = "", validation = "")
+  @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction])
+  var outputFile: File = _
 
-  @Input
-  var reference: File = null
+  /** assumeSorted should be true if the input files are already sorted (based on the position of the variants) */
+  @Argument(fullName = "assumeSorted", shortName = "assumeSorted", doc = "assumeSorted should be true if the input files are already sorted (based on the position of the variants)", required = false, exclusiveOf = "", validation = "")
+  var assumeSorted: Boolean = _
 
-  var assumeSorted = false
+  /** which type of IndexCreator to use for VCF/BCF indices */
+  @Argument(fullName = "variant_index_type", shortName = "", doc = "which type of IndexCreator to use for VCF/BCF indices", required = false, exclusiveOf = "", validation = "")
+  var variant_index_type: Option[String] = None
 
-  override def beforeGraph(): Unit = {
-    super.beforeGraph()
-    if (reference == null) reference = referenceFasta()
-  }
+  /** the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator */
+  @Argument(fullName = "variant_index_parameter", shortName = "", doc = "the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator", required = false, exclusiveOf = "", validation = "")
+  var variant_index_parameter: Option[Int] = None
+
+  /** Set the minimum level of logging */
+  @Argument(fullName = "logging_level", shortName = "l", doc = "Set the minimum level of logging", required = false, exclusiveOf = "", validation = "")
+  var logging_level: String = _
+
+  /** Set the logging location */
+  @Output(fullName = "log_to_file", shortName = "log", doc = "Set the logging location", required = false, exclusiveOf = "", validation = "")
+  @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction])
+  var log_to_file: File = _
 
   override def cmdLine = super.cmdLine +
-    repeat("-V", inputFiles) +
-    required("-out", outputFile) +
-    required("-R", reference) +
-    conditional(assumeSorted, "--assumeSorted")
+    required("-R", reference, spaceSeparated = true, escape = true, format = "%s") +
+    repeat("-V", variant, spaceSeparated = true, escape = true, format = "%s") +
+    required("-out", outputFile, spaceSeparated = true, escape = true, format = "%s") +
+    conditional(assumeSorted, "-assumeSorted", escape = true, format = "%s") +
+    optional("--variant_index_type", variant_index_type, spaceSeparated = true, escape = true, format = "%s") +
+    optional("--variant_index_parameter", variant_index_parameter, spaceSeparated = true, escape = true, format = "%s") +
+    optional("-l", logging_level, spaceSeparated = true, escape = true, format = "%s") +
+    optional("-log", log_to_file, spaceSeparated = true, escape = true, format = "%s")
 }
-
-object CatVariants {
-  def apply(root: Configurable, input: List[File], output: File): CatVariants = {
-    val cv = new CatVariants(root)
-    cv.inputFiles = input
-    cv.outputFile = output
-    cv
-  }
-}
\ No newline at end of file
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariantsGather.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariantsGather.scala
similarity index 96%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariantsGather.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariantsGather.scala
index e97d6affbf023455122c49b3df59900211df0bea..27c6cb7cadb59997c7dd0c2039e1f53c74f4f8e7 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariantsGather.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariantsGather.scala
@@ -1,4 +1,4 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
 import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineGVCFs.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala
similarity index 99%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineGVCFs.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala
index e20331fe1b1c670acff8d1d1ca743585a8a0735f..ed0065e589e17000a4e4fb9742d4bca71f6622a1 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineGVCFs.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala
@@ -1,11 +1,11 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
 import nl.lumc.sasc.biopet.utils.VcfUtils
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output, _ }
 
 class CombineGVCFs(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala
index 343e2d769656dd6800d4cd552f51aa25cec7d28c..ba1740d3518e1ea6dbc17a9c60b0d6aa59abc9df 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala
@@ -1,80 +1,128 @@
-/**
- * Biopet is built on top of GATK Queue for building bioinformatic
- * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
- * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
- * should also be able to execute Biopet tools and pipelines.
- *
- * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
- *
- * Contact us at: sasc@lumc.nl
- *
- * A dual licensing mode is applied. The source code within this project that are
- * not part of GATK Queue is freely available for non-commercial use under an AGPL
- * license; For commercial users or users who do not want to follow the AGPL
- * license, please contact us to obtain a separate license.
- */
 package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
+import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
 import nl.lumc.sasc.biopet.utils.VcfUtils
 import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output }
-
-/**
- * Extension for CombineVariants from GATK
- *
- * Created by pjvan_thof on 2/26/15.
- *
- * @deprecated
- */
-class CombineVariants(val root: Configurable) extends Gatk {
-  val analysisType = "CombineVariants"
-
-  @Input(doc = "", required = true)
-  var inputFiles: List[File] = Nil
-
-  @Output(doc = "", required = true)
-  var outputFile: File = null
-
-  var setKey: String = null
-  var rodPriorityList: String = null
-  var minimumN: Int = config("minimumN", default = 1)
-  var genotypeMergeOptions: Option[String] = config("genotypeMergeOptions")
-  var excludeNonVariants: Boolean = false
-
-  var inputMap: Map[File, String] = Map()
-
-  def addInput(file: File, name: String): Unit = {
-    inputFiles :+= file
-    inputMap += file -> name
-  }
+import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
+import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
+
+class CombineVariants(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
+  def analysis_type = "CombineVariants"
+  scatterClass = classOf[LocusScatterFunction]
+  setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }
+
+  /** VCF files to merge together */
+  @Input(fullName = "variant", shortName = "V", doc = "VCF files to merge together", required = true, exclusiveOf = "", validation = "")
+  var variant: Seq[File] = Nil
+
+  /** File to which variants should be written */
+  @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "")
+  @Gather(classOf[CatVariantsGatherer])
+  var out: File = _
+
+  /** Determines how we should merge genotype records for samples shared across the ROD files */
+  @Argument(fullName = "genotypemergeoption", shortName = "genotypeMergeOptions", doc = "Determines how we should merge genotype records for samples shared across the ROD files", required = false, exclusiveOf = "", validation = "")
+  var genotypemergeoption: Option[String] = config("genotypemergeoption")
+
+  /** Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields */
+  @Argument(fullName = "filteredrecordsmergetype", shortName = "filteredRecordsMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required = false, exclusiveOf = "", validation = "")
+  var filteredrecordsmergetype: Option[String] = config("filteredrecordsmergetype")
+
+  /** Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel) */
+  @Argument(fullName = "multipleallelesmergetype", shortName = "multipleAllelesMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required = false, exclusiveOf = "", validation = "")
+  var multipleallelesmergetype: Option[String] = config("multipleallelesmergetype")
+
+  /** Ordered list specifying priority for merging */
+  @Argument(fullName = "rod_priority_list", shortName = "priority", doc = "Ordered list specifying priority for merging", required = false, exclusiveOf = "", validation = "")
+  var rod_priority_list: Option[String] = config("rod_priority_list")
+
+  /** Emit interesting sites requiring complex compatibility merging to file */
+  @Argument(fullName = "printComplexMerges", shortName = "printComplexMerges", doc = "Emit interesting sites requiring complex compatibility merging to file", required = false, exclusiveOf = "", validation = "")
+  var printComplexMerges: Boolean = config("printComplexMerges", default = false)
+
+  /** Treat filtered variants as uncalled */
+  @Argument(fullName = "filteredAreUncalled", shortName = "filteredAreUncalled", doc = "Treat filtered variants as uncalled", required = false, exclusiveOf = "", validation = "")
+  var filteredAreUncalled: Boolean = config("filteredAreUncalled", default = false)
+
+  /** Emit a sites-only file */
+  @Argument(fullName = "minimalVCF", shortName = "minimalVCF", doc = "Emit a sites-only file", required = false, exclusiveOf = "", validation = "")
+  var minimalVCF: Boolean = config("minimalVCF", default = false)
+
+  /** Exclude sites where no variation is present after merging */
+  @Argument(fullName = "excludeNonVariants", shortName = "env", doc = "Exclude sites where no variation is present after merging", required = false, exclusiveOf = "", validation = "")
+  var excludeNonVariants: Boolean = config("excludeNonVariants", default = false)
+
+  /** Key name for the set attribute */
+  @Argument(fullName = "setKey", shortName = "setKey", doc = "Key name for the set attribute", required = false, exclusiveOf = "", validation = "")
+  var setKey: Option[String] = config("set_key")
+
+  /** Assume input VCFs have identical sample sets and disjoint calls */
+  @Argument(fullName = "assumeIdenticalSamples", shortName = "assumeIdenticalSamples", doc = "Assume input VCFs have identical sample sets and disjoint calls", required = false, exclusiveOf = "", validation = "")
+  var assumeIdenticalSamples: Boolean = config("assumeIdenticalSamples", default = false)
+
+  /** Minimum number of input files the site must be observed in to be included */
+  @Argument(fullName = "minimumN", shortName = "minN", doc = "Minimum number of input files the site must be observed in to be included", required = false, exclusiveOf = "", validation = "")
+  var minimumN: Option[Int] = config("minimumN")
+
+  /** Do not output the command line to the header */
+  @Argument(fullName = "suppressCommandLineHeader", shortName = "suppressCommandLineHeader", doc = "Do not output the command line to the header", required = false, exclusiveOf = "", validation = "")
+  var suppressCommandLineHeader: Boolean = config("suppressCommandLineHeader", default = false)
+
+  /** Use the INFO content of the record with the highest AC */
+  @Argument(fullName = "mergeInfoWithMaxAC", shortName = "mergeInfoWithMaxAC", doc = "Use the INFO content of the record with the highest AC", required = false, exclusiveOf = "", validation = "")
+  var mergeInfoWithMaxAC: Boolean = config("mergeInfoWithMaxAC", default = false)
+
+  /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */
+  @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
+  var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false)
+
+  /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */
+  @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
+  var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false)
+
+  /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */
+  @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "")
+  var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false)
 
   @Output
   @Gather(enabled = false)
   private var outputIndex: File = _
 
-  override def beforeGraph(): Unit = {
+  override def beforeGraph() {
     super.beforeGraph()
-    outputIndex = VcfUtils.getVcfIndexFile(outputFile)
-    genotypeMergeOptions match {
-      case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None =>
-      case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions")
-    }
-    deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi"))
-    deps = deps.distinct
+    deps ++= variant.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig))
+    if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out))
+      outputIndex = VcfUtils.getVcfIndexFile(out)
   }
 
   override def cmdLine = super.cmdLine +
-    (for (file <- inputFiles) yield {
-      inputMap.get(file) match {
-        case Some(name) => required("-V:" + name, file)
-        case _          => required("-V", file)
-      }
-    }).mkString +
-    required("-o", outputFile) +
-    optional("--setKey", setKey) +
-    optional("--rod_priority_list", rodPriorityList) +
-    optional("-genotypeMergeOptions", genotypeMergeOptions) +
-    conditional(excludeNonVariants, "--excludeNonVariants")
+    repeat("-V", variant, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") +
+    optional("-o", out, spaceSeparated = true, escape = true, format = "%s") +
+    optional("-genotypeMergeOptions", genotypemergeoption, spaceSeparated = true, escape = true, format = "%s") +
+    optional("-filteredRecordsMergeType", filteredrecordsmergetype, spaceSeparated = true, escape = true, format = "%s") +
+    optional("-multipleAllelesMergeType", multipleallelesmergetype, spaceSeparated = true, escape = true, format = "%s") +
+    optional("-priority", rod_priority_list, spaceSeparated = true, escape = true, format = "%s") +
+    conditional(printComplexMerges, "-printComplexMerges", escape = true, format = "%s") +
+    conditional(filteredAreUncalled, "-filteredAreUncalled", escape = true, format = "%s") +
+    conditional(minimalVCF, "-minimalVCF", escape = true, format = "%s") +
+    conditional(excludeNonVariants, "-env", escape = true, format = "%s") +
+    optional("-setKey", setKey, spaceSeparated = true, escape = true, format = "%s") +
+    conditional(assumeIdenticalSamples, "-assumeIdenticalSamples", escape = true, format = "%s") +
+    optional("-minN", minimumN, spaceSeparated = true, escape = true, format = "%s") +
+    conditional(suppressCommandLineHeader, "-suppressCommandLineHeader", escape = true, format = "%s") +
+    conditional(mergeInfoWithMaxAC, "-mergeInfoWithMaxAC", escape = true, format = "%s") +
+    conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") +
+    conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") +
+    conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s")
+}
+
+object CombineVariants {
+  def apply(root: Configurable, input: List[File], output: File): CombineVariants = {
+    val cv = new CombineVariants(root)
+    cv.variant = input
+    cv.out = output
+    cv
+  }
 }
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CommandLineGATK.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CommandLineGATK.scala
similarity index 99%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CommandLineGATK.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CommandLineGATK.scala
index 3e8091437d7bfe377748a827fef43655cf5b65b4..7fbba210ce42d99573c3dfd127a45ab9346f438a 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CommandLineGATK.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CommandLineGATK.scala
@@ -1,4 +1,4 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ContigScatterFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ContigScatterFunction.scala
similarity index 88%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ContigScatterFunction.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ContigScatterFunction.scala
index 12350d3ad5d891e410f15dc90662645c3ceb2319..abfc807c26b7a88623b4b3ec649b010cb3d5d526 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/ContigScatterFunction.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ContigScatterFunction.scala
@@ -1,8 +1,9 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
-import collection.JavaConversions._
-import org.broadinstitute.gatk.utils.interval.IntervalUtils
 import org.broadinstitute.gatk.queue.function.InProcessFunction
+import org.broadinstitute.gatk.utils.interval.IntervalUtils
+
+import scala.collection.JavaConversions._
 
 /**
  * Splits intervals by contig instead of evenly.
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala
deleted file mode 100644
index 92ca40e02d94e4935f3f1c031cf1371e4b77b8a2..0000000000000000000000000000000000000000
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/Gatk.scala
+++ /dev/null
@@ -1,76 +0,0 @@
-/**
- * Biopet is built on top of GATK Queue for building bioinformatic
- * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
- * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
- * should also be able to execute Biopet tools and pipelines.
- *
- * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
- *
- * Contact us at: sasc@lumc.nl
- *
- * A dual licensing mode is applied. The source code within this project that are
- * not part of GATK Queue is freely available for non-commercial use under an AGPL
- * license; For commercial users or users who do not want to follow the AGPL
- * license, please contact us to obtain a separate license.
- */
-package nl.lumc.sasc.biopet.extensions.gatk
-
-import java.io.File
-
-import nl.lumc.sasc.biopet.core.{ Version, BiopetJavaCommandLineFunction, Reference }
-import org.broadinstitute.gatk.utils.commandline.Input
-
-/**
- * General extension for GATK module
- *
- * Created by pjvan_thof on 2/26/15.
- *
- * @deprecated
- */
-abstract class Gatk extends BiopetJavaCommandLineFunction with Reference with Version {
-  override def subPath = "gatk" :: super.subPath
-
-  jarFile = config("gatk_jar")
-
-  val analysisType: String
-
-  override def defaultCoreMemory = 3.0
-
-  @Input(required = true)
-  var reference: File = null
-
-  @Input(required = false)
-  var gatkKey: Option[File] = config("gatk_key")
-
-  @Input(required = false)
-  var intervals: List[File] = config("intervals", default = Nil)
-
-  @Input(required = false)
-  var excludeIntervals: List[File] = config("exclude_intervals", default = Nil)
-
-  @Input(required = false)
-  var pedigree: List[File] = config("pedigree", default = Nil)
-
-  var et: Option[String] = config("et")
-
-  def versionRegex = """(.*)""".r
-  override def versionExitcode = List(0, 1)
-  def versionCommand = executable + " -jar " + jarFile + " -version"
-
-  override def getVersion = super.getVersion.collect { case version => "Gatk " + version }
-  override def dictRequired = true
-
-  override def beforeGraph(): Unit = {
-    super.beforeGraph()
-    if (reference == null) reference = referenceFasta()
-  }
-
-  override def cmdLine = super.cmdLine +
-    required("-T", analysisType) +
-    required("-R", reference) +
-    optional("-K", gatkKey) +
-    optional("-et", et) +
-    repeat("-L", intervals) +
-    repeat("-XL", excludeIntervals) +
-    repeat("-ped", pedigree)
-}
\ No newline at end of file
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkScatterFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkScatterFunction.scala
similarity index 97%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkScatterFunction.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkScatterFunction.scala
index f2399b946ca51488bc102b90c9b96f4368f91147..d49b4d34372dfebdd5eafb6469d643e3067a8d4b 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkScatterFunction.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkScatterFunction.scala
@@ -1,12 +1,12 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
-import org.broadinstitute.gatk.utils.interval.IntervalUtils
 import java.io.File
 
 import org.broadinstitute.gatk.queue.extensions.gatk.GATKIntervals
-import org.broadinstitute.gatk.utils.io.IOUtils
 import org.broadinstitute.gatk.queue.function.scattergather.{ CloneFunction, ScatterFunction }
-import org.broadinstitute.gatk.utils.commandline.{ Output, _ }
+import org.broadinstitute.gatk.utils.commandline.Output
+import org.broadinstitute.gatk.utils.interval.IntervalUtils
+import org.broadinstitute.gatk.utils.io.IOUtils
 
 trait GATKScatterFunction extends ScatterFunction {
   /* The runtime field to set for specifying intervals. */
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala
index 62d2457de0431b8c28ddb3e2eae58352a4b39b23..aea609fb98b311748e98cd044cf0b454a88bfe94 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeConcordance.scala
@@ -1,52 +1,70 @@
-/**
- * Biopet is built on top of GATK Queue for building bioinformatic
- * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
- * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
- * should also be able to execute Biopet tools and pipelines.
- *
- * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
- *
- * Contact us at: sasc@lumc.nl
- *
- * A dual licensing mode is applied. The source code within this project that are
- * not part of GATK Queue is freely available for non-commercial use under an AGPL
- * license; For commercial users or users who do not want to follow the AGPL
- * license, please contact us to obtain a separate license.
- */
 package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
+import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
 import nl.lumc.sasc.biopet.core.summary.Summarizable
+import nl.lumc.sasc.biopet.utils.VcfUtils
 import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
-import org.broadinstitute.gatk.utils.report.{ GATKReportTable, GATKReport }
+import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
+import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
+import org.broadinstitute.gatk.utils.report.{ GATKReport, GATKReportTable }
 
-/**
- * Extension for CombineVariants from GATK
- *
- * Created by pjvan_thof on 2/26/15.
- *
- * @deprecated
- */
-class GenotypeConcordance(val root: Configurable) extends Gatk with Summarizable {
-  val analysisType = "GenotypeConcordance"
+class GenotypeConcordance(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction with Summarizable {
+  analysisName = "GenotypeConcordance"
+  val analysis_type = "GenotypeConcordance"
+  scatterClass = classOf[LocusScatterFunction]
+  setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }
 
-  @Input(required = true)
-  var evalFile: File = null
+  /** The variants and genotypes to evaluate */
+  @Input(fullName = "eval", shortName = "eval", doc = "The variants and genotypes to evaluate", required = true, exclusiveOf = "", validation = "")
+  var eval: File = _
 
-  @Input(required = true)
-  var compFile: File = null
+  /** The variants and genotypes to compare against */
+  @Input(fullName = "comp", shortName = "comp", doc = "The variants and genotypes to compare against", required = true, exclusiveOf = "", validation = "")
+  var comp: File = _
 
-  @Output(required = true)
-  var outputFile: File = null
+  /** Filters will be ignored */
+  @Argument(fullName = "ignoreFilters", shortName = "", doc = "Filters will be ignored", required = false, exclusiveOf = "", validation = "")
+  var ignoreFilters: Boolean = config("ignoreFilters", default = false)
 
-  var moltenize = true
+  /** One or more criteria to use to set EVAL genotypes to no-call. These genotype-level filters are only applied to the EVAL rod. */
+  @Argument(fullName = "genotypeFilterExpressionEval", shortName = "gfe", doc = "One or more criteria to use to set EVAL genotypes to no-call. These genotype-level filters are only applied to the EVAL rod.", required = false, exclusiveOf = "", validation = "")
+  var genotypeFilterExpressionEval: List[String] = config("genotypeFilterExpressionEval", default = Nil)
 
-  def summaryFiles = Map("output" -> outputFile)
+  /** One or more criteria to use to set COMP genotypes to no-call. These genotype-level filters are only applied to the COMP rod. */
+  @Argument(fullName = "genotypeFilterExpressionComp", shortName = "gfc", doc = "One or more criteria to use to set COMP genotypes to no-call. These genotype-level filters are only applied to the COMP rod.", required = false, exclusiveOf = "", validation = "")
+  var genotypeFilterExpressionComp: Seq[String] = config("genotypeFilterExpressionComp", default = Nil)
+
+  /** Molten rather than tabular output */
+  @Argument(fullName = "moltenize", shortName = "moltenize", doc = "Molten rather than tabular output", required = false, exclusiveOf = "", validation = "")
+  var moltenize: Boolean = config("moltenize", default = true)
+
+  /** File to output the discordant sites and genotypes. */
+  @Output(fullName = "printInterestingSites", shortName = "sites", doc = "File to output the discordant sites and genotypes.", required = false, exclusiveOf = "", validation = "")
+  var printInterestingSites: Option[File] = None
+
+  /** An output file created by the walker.  Will overwrite contents if file exists */
+  @Output(fullName = "out", shortName = "o", doc = "An output file created by the walker.  Will overwrite contents if file exists", required = false, exclusiveOf = "", validation = "")
+  @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction])
+  var out: File = _
+
+  /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */
+  @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
+  var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false)
+
+  /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */
+  @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
+  var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false)
+
+  /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */
+  @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "")
+  var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false)
+
+  def summaryFiles = Map("output" -> out)
 
   def summaryStats = {
-    val report = new GATKReport(outputFile)
+    val report = new GATKReport(out)
     val compProportions = report.getTable("GenotypeConcordance_CompProportions")
     val counts = report.getTable("GenotypeConcordance_Counts")
     val evalProportions = report.getTable("GenotypeConcordance_EvalProportions")
@@ -82,15 +100,22 @@ class GenotypeConcordance(val root: Configurable) extends Gatk with Summarizable
     )
   }
 
-  override def beforeGraph(): Unit = {
+  override def beforeGraph() {
     super.beforeGraph()
-    deps :::= (evalFile :: compFile :: Nil).filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi"))
-    deps = deps.distinct
+    if (eval != null) deps :+= VcfUtils.getVcfIndexFile(eval)
+    if (comp != null) deps :+= VcfUtils.getVcfIndexFile(comp)
   }
 
   override def cmdLine = super.cmdLine +
-    required("--eval", evalFile) +
-    required("--comp", compFile) +
-    required("-o", outputFile) +
-    conditional(moltenize, "--moltenize")
+    required(TaggedFile.formatCommandLineParameter("-eval", eval), eval, spaceSeparated = true, escape = true, format = "%s") +
+    required(TaggedFile.formatCommandLineParameter("-comp", comp), comp, spaceSeparated = true, escape = true, format = "%s") +
+    conditional(ignoreFilters, "--ignoreFilters", escape = true, format = "%s") +
+    repeat("-gfe", genotypeFilterExpressionEval, spaceSeparated = true, escape = true, format = "%s") +
+    repeat("-gfc", genotypeFilterExpressionComp, spaceSeparated = true, escape = true, format = "%s") +
+    conditional(moltenize, "-moltenize", escape = true, format = "%s") +
+    optional("-sites", printInterestingSites, spaceSeparated = true, escape = true, format = "%s") +
+    optional("-o", out, spaceSeparated = true, escape = true, format = "%s") +
+    conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") +
+    conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") +
+    conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s")
 }
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala
similarity index 99%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala
index b1a54e34f4b9a079fef323110c2b61b7d9c5ad25..650340d63c1be0ba9195609a616ddddb4abee8ef 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala
@@ -1,11 +1,11 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
 import nl.lumc.sasc.biopet.utils.VcfUtils
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
 
 class GenotypeGVCFs(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala
similarity index 99%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala
index 5ffdcb306b32bcc06152fd3e1a7457715443cee9..9eac2ba9e3d4974e7b479addcb3d0f1dadf5ef56 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala
@@ -1,10 +1,10 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.utils.config.Configurable
 import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
 import nl.lumc.sasc.biopet.utils.VcfUtils
+import nl.lumc.sasc.biopet.utils.config.Configurable
 import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output }
 
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala
similarity index 99%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala
index 34b7c58f9af92fdaee68bb710fc800779984e953..7d16d832892a0daaffd999f549f5fd00f327b51c 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala
@@ -1,11 +1,11 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
 import nl.lumc.sasc.biopet.utils.VcfUtils
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
 
 class IndelRealigner(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/LocusScatterFunction.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/LocusScatterFunction.scala
similarity index 87%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/LocusScatterFunction.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/LocusScatterFunction.scala
index 853d185b37d7cb1838b2c3f2928a91cdb8b82548..6a3b961ecbfb034465909714305e469af63a816a 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/LocusScatterFunction.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/LocusScatterFunction.scala
@@ -1,8 +1,9 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
-import collection.JavaConversions._
-import org.broadinstitute.gatk.utils.interval.IntervalUtils
 import org.broadinstitute.gatk.queue.function.InProcessFunction
+import org.broadinstitute.gatk.utils.interval.IntervalUtils
+
+import scala.collection.JavaConversions._
 
 /**
  * A scatter function that divides down to the locus level.
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/PrintReads.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala
similarity index 99%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/PrintReads.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala
index 9f18533cf8f78dcf9ebc8f3749112db9e6d6bd4f..6eaca11891f0d5a559cd3723f5ee8852800f9260 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/PrintReads.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala
@@ -1,9 +1,9 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.utils.config.Configurable
 import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
+import nl.lumc.sasc.biopet.utils.config.Configurable
 import org.broadinstitute.gatk.utils.commandline._
 
 class PrintReads(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/RealignerTargetCreator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala
similarity index 97%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/RealignerTargetCreator.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala
index 383e74fbf0e4055c2fb5ec296b5a4ccaf72ead39..74ce632bd78dba9de535c3f3b4f82fb481976410 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/RealignerTargetCreator.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala
@@ -1,11 +1,11 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.queue.extensions.gatk.{ TaggedFile }
 import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
 import nl.lumc.sasc.biopet.utils.VcfUtils
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, _ }
 
 class RealignerTargetCreator(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala
index d98abff1485b59cd0424eff47d03b0d1dbfe585d..a1ed7b732f9b72d1660c9ea8c1995e5fc0137a68 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala
@@ -1,69 +1,262 @@
-/**
- * Biopet is built on top of GATK Queue for building bioinformatic
- * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
- * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
- * should also be able to execute Biopet tools and pipelines.
- *
- * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
- *
- * Contact us at: sasc@lumc.nl
- *
- * A dual licensing mode is applied. The source code within this project that are
- * not part of GATK Queue is freely available for non-commercial use under an AGPL
- * license; For commercial users or users who do not want to follow the AGPL
- * license, please contact us to obtain a separate license.
- */
 package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
+import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
 import nl.lumc.sasc.biopet.utils.VcfUtils
 import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output }
+import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
+import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
 
-/**
- * Extension for CombineVariants from GATK
- *
- * Created by pjvan_thof on 2/26/15.
- *
- * @deprecated
- */
-class SelectVariants(val root: Configurable) extends Gatk {
-  val analysisType = "SelectVariants"
+class SelectVariants(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
+  def analysis_type = "SelectVariants"
+  scatterClass = classOf[LocusScatterFunction]
+  setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }
 
-  @Input(doc = "", required = true)
-  var inputFiles: List[File] = Nil
+  /** Input VCF file */
+  @Input(fullName = "variant", shortName = "V", doc = "Input VCF file", required = true, exclusiveOf = "", validation = "")
+  var variant: File = _
 
-  @Output(doc = "", required = true)
-  var outputFile: File = null
+  /** Output variants not called in this comparison track */
+  @Input(fullName = "discordance", shortName = "disc", doc = "Output variants not called in this comparison track", required = false, exclusiveOf = "", validation = "")
+  var discordance: Option[File] = None
 
-  var excludeNonVariants: Boolean = false
+  /** Output variants also called in this comparison track */
+  @Input(fullName = "concordance", shortName = "conc", doc = "Output variants also called in this comparison track", required = false, exclusiveOf = "", validation = "")
+  var concordance: Option[File] = None
 
-  var inputMap: Map[File, String] = Map()
+  /** File to which variants should be written */
+  @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "")
+  @Gather(classOf[CatVariantsGatherer])
+  var out: File = _
 
-  def addInput(file: File, name: String): Unit = {
-    inputFiles :+= file
-    inputMap += file -> name
-  }
+  /** Include genotypes from this sample */
+  @Argument(fullName = "sample_name", shortName = "sn", doc = "Include genotypes from this sample", required = false, exclusiveOf = "", validation = "")
+  var sample_name: List[String] = config("sample_name", default = Nil)
+
+  /** Regular expression to select multiple samples */
+  @Argument(fullName = "sample_expressions", shortName = "se", doc = "Regular expression to select multiple samples", required = false, exclusiveOf = "", validation = "")
+  var sample_expressions: List[String] = config("sample_expressions", default = Nil)
+
+  /** File containing a list of samples to include */
+  @Input(fullName = "sample_file", shortName = "sf", doc = "File containing a list of samples to include", required = false, exclusiveOf = "", validation = "")
+  var sample_file: List[File] = config("sample_file", default = Nil)
+
+  /** Exclude genotypes from this sample */
+  @Argument(fullName = "exclude_sample_name", shortName = "xl_sn", doc = "Exclude genotypes from this sample", required = false, exclusiveOf = "", validation = "")
+  var exclude_sample_name: List[String] = config("exclude_sample_name", default = Nil)
+
+  /** List of samples to exclude */
+  @Input(fullName = "exclude_sample_file", shortName = "xl_sf", doc = "List of samples to exclude", required = false, exclusiveOf = "", validation = "")
+  var exclude_sample_file: List[File] = config("exclude_sample_file", default = Nil)
+
+  /** List of sample expressions to exclude */
+  @Input(fullName = "exclude_sample_expressions", shortName = "xl_se", doc = "List of sample expressions to exclude", required = false, exclusiveOf = "", validation = "")
+  var exclude_sample_expressions: List[File] = config("exclude_sample_expressions", default = Nil)
+
+  /** One or more criteria to use when selecting the data */
+  @Argument(fullName = "selectexpressions", shortName = "select", doc = "One or more criteria to use when selecting the data", required = false, exclusiveOf = "", validation = "")
+  var selectexpressions: List[String] = config("selectexpressions", default = Nil)
+
+  /** Invert the selection criteria for -select */
+  @Argument(fullName = "invertselect", shortName = "invertSelect", doc = "Invert the selection criteria for -select", required = false, exclusiveOf = "", validation = "")
+  var invertselect: Boolean = config("invertselect", default = false)
+
+  /** Don't include non-variant sites */
+  @Argument(fullName = "excludeNonVariants", shortName = "env", doc = "Don't include non-variant sites", required = false, exclusiveOf = "", validation = "")
+  var excludeNonVariants: Boolean = config("excludeNonVariants", default = false)
+
+  /** Don't include filtered sites */
+  @Argument(fullName = "excludeFiltered", shortName = "ef", doc = "Don't include filtered sites", required = false, exclusiveOf = "", validation = "")
+  var excludeFiltered: Boolean = config("excludeFiltered", default = false)
+
+  /** Preserve original alleles, do not trim */
+  @Argument(fullName = "preserveAlleles", shortName = "noTrim", doc = "Preserve original alleles, do not trim", required = false, exclusiveOf = "", validation = "")
+  var preserveAlleles: Boolean = config("preserveAlleles", default = false)
+
+  /** Remove alternate alleles not present in any genotypes */
+  @Argument(fullName = "removeUnusedAlternates", shortName = "trimAlternates", doc = "Remove alternate alleles not present in any genotypes", required = false, exclusiveOf = "", validation = "")
+  var removeUnusedAlternates: Boolean = config("removeUnusedAlternates", default = false)
+
+  /** Select only variants of a particular allelicity */
+  @Argument(fullName = "restrictAllelesTo", shortName = "restrictAllelesTo", doc = "Select only variants of a particular allelicity", required = false, exclusiveOf = "", validation = "")
+  var restrictAllelesTo: Option[String] = config("restrictAllelesTo")
+
+  /** Store the original AC, AF, and AN values after subsetting */
+  @Argument(fullName = "keepOriginalAC", shortName = "keepOriginalAC", doc = "Store the original AC, AF, and AN values after subsetting", required = false, exclusiveOf = "", validation = "")
+  var keepOriginalAC: Boolean = config("keepOriginalAC", default = false)
+
+  /** Store the original DP value after subsetting */
+  @Argument(fullName = "keepOriginalDP", shortName = "keepOriginalDP", doc = "Store the original DP value after subsetting", required = false, exclusiveOf = "", validation = "")
+  var keepOriginalDP: Boolean = config("keepOriginalDP", default = false)
+
+  /** Output mendelian violation sites only */
+  @Argument(fullName = "mendelianViolation", shortName = "mv", doc = "Output mendelian violation sites only", required = false, exclusiveOf = "", validation = "")
+  var mendelianViolation: Boolean = config("mendelianViolation", default = false)
+
+  /** Output non-mendelian violation sites only */
+  @Argument(fullName = "invertMendelianViolation", shortName = "invMv", doc = "Output non-mendelian violation sites only", required = false, exclusiveOf = "", validation = "")
+  var invertMendelianViolation: Boolean = config("invertMendelianViolation", default = false)
+
+  /** Minimum GQ score for each trio member to accept a site as a violation */
+  @Argument(fullName = "mendelianViolationQualThreshold", shortName = "mvq", doc = "Minimum GQ score for each trio member to accept a site as a violation", required = false, exclusiveOf = "", validation = "")
+  var mendelianViolationQualThreshold: Option[Double] = config("mendelianViolationQualThreshold")
+
+  /** Format string for mendelianViolationQualThreshold */
+  @Argument(fullName = "mendelianViolationQualThresholdFormat", shortName = "", doc = "Format string for mendelianViolationQualThreshold", required = false, exclusiveOf = "", validation = "")
+  var mendelianViolationQualThresholdFormat: String = "%s"
+
+  /** Select a fraction of variants at random from the input */
+  @Argument(fullName = "select_random_fraction", shortName = "fraction", doc = "Select a fraction of variants at random from the input", required = false, exclusiveOf = "", validation = "")
+  var select_random_fraction: Option[Double] = config("select_random_fraction")
+
+  /** Format string for select_random_fraction */
+  @Argument(fullName = "select_random_fractionFormat", shortName = "", doc = "Format string for select_random_fraction", required = false, exclusiveOf = "", validation = "")
+  var select_random_fractionFormat: String = "%s"
+
+  /** Select a fraction of genotypes at random from the input and sets them to no-call */
+  @Argument(fullName = "remove_fraction_genotypes", shortName = "fractionGenotypes", doc = "Select a fraction of genotypes at random from the input and sets them to no-call", required = false, exclusiveOf = "", validation = "")
+  var remove_fraction_genotypes: Option[Double] = config("remove_fraction_genotypes")
+
+  /** Format string for remove_fraction_genotypes */
+  @Argument(fullName = "remove_fraction_genotypesFormat", shortName = "", doc = "Format string for remove_fraction_genotypes", required = false, exclusiveOf = "", validation = "")
+  var remove_fraction_genotypesFormat: String = "%s"
+
+  /** Select only a certain type of variants from the input file */
+  @Argument(fullName = "selectTypeToInclude", shortName = "selectType", doc = "Select only a certain type of variants from the input file", required = false, exclusiveOf = "", validation = "")
+  var selectTypeToInclude: List[String] = config("selectTypeToInclude", default = Nil)
+
+  /** Do not select certain type of variants from the input file */
+  @Argument(fullName = "selectTypeToExclude", shortName = "xlSelectType", doc = "Do not select certain type of variants from the input file", required = false, exclusiveOf = "", validation = "")
+  var selectTypeToExclude: Seq[String] = config("selectTypeToExclude", default = Nil)
+
+  /** List of variant IDs to select */
+  @Input(fullName = "keepIDs", shortName = "IDs", doc = "List of variant IDs to select", required = false, exclusiveOf = "", validation = "")
+  var keepIDs: Option[File] = config("keepIDs")
+
+  /** List of variant IDs to select */
+  @Argument(fullName = "excludeIDs", shortName = "xlIDs", doc = "List of variant IDs to select", required = false, exclusiveOf = "", validation = "")
+  var excludeIDs: Option[File] = config("excludeIDs")
+
+  /** If true, the incoming VariantContext will be fully decoded */
+  @Argument(fullName = "fullyDecode", shortName = "", doc = "If true, the incoming VariantContext will be fully decoded", required = false, exclusiveOf = "", validation = "")
+  var fullyDecode: Boolean = config("fullyDecode", default = false)
+
+  /** If true, we won't actually write the output file.  For efficiency testing only */
+  @Argument(fullName = "justRead", shortName = "", doc = "If true, we won't actually write the output file.  For efficiency testing only", required = false, exclusiveOf = "", validation = "")
+  var justRead: Boolean = config("justRead", default = false)
+
+  /** Maximum size of indels to include */
+  @Argument(fullName = "maxIndelSize", shortName = "", doc = "Maximum size of indels to include", required = false, exclusiveOf = "", validation = "")
+  var maxIndelSize: Option[Int] = config("maxIndelSize")
+
+  /** Minimum size of indels to include */
+  @Argument(fullName = "minIndelSize", shortName = "", doc = "Minimum size of indels to include", required = false, exclusiveOf = "", validation = "")
+  var minIndelSize: Option[Int] = config("minIndelSize")
+
+  /** Maximum number of samples filtered at the genotype level */
+  @Argument(fullName = "maxFilteredGenotypes", shortName = "", doc = "Maximum number of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "")
+  var maxFilteredGenotypes: Option[Int] = config("maxFilteredGenotypes")
+
+  /** Minimum number of samples filtered at the genotype level */
+  @Argument(fullName = "minFilteredGenotypes", shortName = "", doc = "Minimum number of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "")
+  var minFilteredGenotypes: Option[Int] = config("minFilteredGenotypes")
+
+  /** Maximum fraction of samples filtered at the genotype level */
+  @Argument(fullName = "maxFractionFilteredGenotypes", shortName = "", doc = "Maximum fraction of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "")
+  var maxFractionFilteredGenotypes: Option[Double] = config("maxFractionFilteredGenotypes")
+
+  /** Format string for maxFractionFilteredGenotypes */
+  @Argument(fullName = "maxFractionFilteredGenotypesFormat", shortName = "", doc = "Format string for maxFractionFilteredGenotypes", required = false, exclusiveOf = "", validation = "")
+  var maxFractionFilteredGenotypesFormat: String = "%s"
+
+  /** Maximum fraction of samples filtered at the genotype level */
+  @Argument(fullName = "minFractionFilteredGenotypes", shortName = "", doc = "Maximum fraction of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "")
+  var minFractionFilteredGenotypes: Option[Double] = config("minFractionFilteredGenotypes")
+
+  /** Format string for minFractionFilteredGenotypes */
+  @Argument(fullName = "minFractionFilteredGenotypesFormat", shortName = "", doc = "Format string for minFractionFilteredGenotypes", required = false, exclusiveOf = "", validation = "")
+  var minFractionFilteredGenotypesFormat: String = "%s"
+
+  /** Set filtered genotypes to no-call */
+  @Argument(fullName = "setFilteredGtToNocall", shortName = "", doc = "Set filtered genotypes to no-call", required = false, exclusiveOf = "", validation = "")
+  var setFilteredGtToNocall: Boolean = config("setFilteredGtToNocall", default = false)
+
+  /** Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored. */
+  @Argument(fullName = "ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", shortName = "", doc = "Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.", required = false, exclusiveOf = "", validation = "")
+  var ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES: Boolean = config("ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", default = false)
+
+  /** Forces output VCF to be compliant to up-to-date version */
+  @Argument(fullName = "forceValidOutput", shortName = "", doc = "Forces output VCF to be compliant to up-to-date version", required = false, exclusiveOf = "", validation = "")
+  var forceValidOutput: Boolean = config("forceValidOutput", default = false)
+
+  /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */
+  @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
+  var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false)
+
+  /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */
+  @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
+  var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false)
+
+  /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */
+  @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "")
+  var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false)
 
   @Output
   @Gather(enabled = false)
   private var outputIndex: File = _
 
-  override def beforeGraph(): Unit = {
+  override def beforeGraph() {
     super.beforeGraph()
-    outputIndex = VcfUtils.getVcfIndexFile(outputFile)
-    deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi"))
-    deps = deps.distinct
+    if (variant != null)
+      deps :+= VcfUtils.getVcfIndexFile(variant)
+    discordance.foreach(deps :+= VcfUtils.getVcfIndexFile(_))
+    concordance.foreach(deps :+= VcfUtils.getVcfIndexFile(_))
+    if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out))
+      outputIndex = VcfUtils.getVcfIndexFile(out)
   }
 
   override def cmdLine = super.cmdLine +
-    (for (file <- inputFiles) yield {
-      inputMap.get(file) match {
-        case Some(name) => required("-V:" + name, file)
-        case _          => required("-V", file)
-      }
-    }).mkString +
-    required("-o", outputFile) +
-    conditional(excludeNonVariants, "--excludeNonVariants")
+    required(TaggedFile.formatCommandLineParameter("-V", variant), variant, spaceSeparated = true, escape = true, format = "%s") +
+    optional(TaggedFile.formatCommandLineParameter("-disc", discordance), discordance, spaceSeparated = true, escape = true, format = "%s") +
+    optional(TaggedFile.formatCommandLineParameter("-conc", concordance), concordance, spaceSeparated = true, escape = true, format = "%s") +
+    optional("-o", out, spaceSeparated = true, escape = true, format = "%s") +
+    repeat("-sn", sample_name, spaceSeparated = true, escape = true, format = "%s") +
+    repeat("-se", sample_expressions, spaceSeparated = true, escape = true, format = "%s") +
+    repeat("-sf", sample_file, spaceSeparated = true, escape = true, format = "%s") +
+    repeat("-xl_sn", exclude_sample_name, spaceSeparated = true, escape = true, format = "%s") +
+    repeat("-xl_sf", exclude_sample_file, spaceSeparated = true, escape = true, format = "%s") +
+    repeat("-xl_se", exclude_sample_expressions, spaceSeparated = true, escape = true, format = "%s") +
+    repeat("-select", selectexpressions, spaceSeparated = true, escape = true, format = "%s") +
+    conditional(invertselect, "-invertSelect", escape = true, format = "%s") +
+    conditional(excludeNonVariants, "-env", escape = true, format = "%s") +
+    conditional(excludeFiltered, "-ef", escape = true, format = "%s") +
+    conditional(preserveAlleles, "-noTrim", escape = true, format = "%s") +
+    conditional(removeUnusedAlternates, "-trimAlternates", escape = true, format = "%s") +
+    optional("-restrictAllelesTo", restrictAllelesTo, spaceSeparated = true, escape = true, format = "%s") +
+    conditional(keepOriginalAC, "-keepOriginalAC", escape = true, format = "%s") +
+    conditional(keepOriginalDP, "-keepOriginalDP", escape = true, format = "%s") +
+    conditional(mendelianViolation, "-mv", escape = true, format = "%s") +
+    conditional(invertMendelianViolation, "-invMv", escape = true, format = "%s") +
+    optional("-mvq", mendelianViolationQualThreshold, spaceSeparated = true, escape = true, format = mendelianViolationQualThresholdFormat) +
+    optional("-fraction", select_random_fraction, spaceSeparated = true, escape = true, format = select_random_fractionFormat) +
+    optional("-fractionGenotypes", remove_fraction_genotypes, spaceSeparated = true, escape = true, format = remove_fraction_genotypesFormat) +
+    repeat("-selectType", selectTypeToInclude, spaceSeparated = true, escape = true, format = "%s") +
+    repeat("-xlSelectType", selectTypeToExclude, spaceSeparated = true, escape = true, format = "%s") +
+    optional("-IDs", keepIDs, spaceSeparated = true, escape = true, format = "%s") +
+    optional("-xlIDs", excludeIDs, spaceSeparated = true, escape = true, format = "%s") +
+    conditional(fullyDecode, "--fullyDecode", escape = true, format = "%s") +
+    conditional(justRead, "--justRead", escape = true, format = "%s") +
+    optional("--maxIndelSize", maxIndelSize, spaceSeparated = true, escape = true, format = "%s") +
+    optional("--minIndelSize", minIndelSize, spaceSeparated = true, escape = true, format = "%s") +
+    optional("--maxFilteredGenotypes", maxFilteredGenotypes, spaceSeparated = true, escape = true, format = "%s") +
+    optional("--minFilteredGenotypes", minFilteredGenotypes, spaceSeparated = true, escape = true, format = "%s") +
+    optional("--maxFractionFilteredGenotypes", maxFractionFilteredGenotypes, spaceSeparated = true, escape = true, format = maxFractionFilteredGenotypesFormat) +
+    optional("--minFractionFilteredGenotypes", minFractionFilteredGenotypes, spaceSeparated = true, escape = true, format = minFractionFilteredGenotypesFormat) +
+    conditional(setFilteredGtToNocall, "--setFilteredGtToNocall", escape = true, format = "%s") +
+    conditional(ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES, "--ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", escape = true, format = "%s") +
+    conditional(forceValidOutput, "--forceValidOutput", escape = true, format = "%s") +
+    conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") +
+    conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") +
+    conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s")
 }
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala
similarity index 99%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala
index fbb4423898d3f0c688a71cce40a2f1ad2b632978..0edfe5260fe2fb9101fd92d01fbcebce94ba0441 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala
@@ -1,11 +1,11 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
 import nl.lumc.sasc.biopet.utils.VcfUtils
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output, _ }
 
 class UnifiedGenotyper(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantAnnotator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala
similarity index 99%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantAnnotator.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala
index 7fa034cfe6c48b3e4498a7ee4f968404a157a88c..d98a55a49eb6e34c00588fddde66dc00cddfe610 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantAnnotator.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala
@@ -1,4 +1,4 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantEval.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala
similarity index 98%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantEval.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala
index a595ce5c1494101ac9104220db3a2c28d3120d2a..cf1c362c7dd06b2eec25104fdfd3fb5a06dea2f8 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantEval.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala
@@ -1,13 +1,11 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
+
+import java.io.File
 
 import nl.lumc.sasc.biopet.utils.VcfUtils
 import nl.lumc.sasc.biopet.utils.config.Configurable
 import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
-import java.io.File
-import org.broadinstitute.gatk.utils.commandline.Argument
-import org.broadinstitute.gatk.utils.commandline.Gather
-import org.broadinstitute.gatk.utils.commandline.Input
-import org.broadinstitute.gatk.utils.commandline.Output
+import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output }
 
 class VariantEval(val root: Configurable) extends CommandLineGATK {
   def analysis_type = "VariantEval"
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantRecalibrator.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala
similarity index 99%
rename from biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantRecalibrator.scala
rename to biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala
index db631449a9e6c59112832247367634ee8d256983..96b5ee4c00fc1350e6298f31d11c2308010b6e23 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/VariantRecalibrator.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala
@@ -1,4 +1,4 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
+package nl.lumc.sasc.biopet.extensions.gatk
 
 import java.io.File
 
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariants.scala
deleted file mode 100644
index 6a23df5ff8d1f1a66c7606e826d7a09ad8924cc5..0000000000000000000000000000000000000000
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CatVariants.scala
+++ /dev/null
@@ -1,59 +0,0 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
-
-import java.io.File
-
-import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.utils.commandline.Argument
-import org.broadinstitute.gatk.utils.commandline.Gather
-import org.broadinstitute.gatk.utils.commandline.Input
-import org.broadinstitute.gatk.utils.commandline.Output
-
-class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction {
-  analysisName = "CatVariants"
-  javaMainClass = "org.broadinstitute.gatk.tools.CatVariants"
-
-  /** genome reference file <name>.fasta */
-  @Input(fullName = "reference", shortName = "R", doc = "genome reference file <name>.fasta", required = true, exclusiveOf = "", validation = "")
-  var reference: File = _
-
-  /** Input VCF file/s */
-  @Input(fullName = "variant", shortName = "V", doc = "Input VCF file/s", required = true, exclusiveOf = "", validation = "")
-  var variant: Seq[File] = Nil
-
-  /** output file */
-  @Output(fullName = "outputFile", shortName = "out", doc = "output file", required = true, exclusiveOf = "", validation = "")
-  @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction])
-  var outputFile: File = _
-
-  /** assumeSorted should be true if the input files are already sorted (based on the position of the variants) */
-  @Argument(fullName = "assumeSorted", shortName = "assumeSorted", doc = "assumeSorted should be true if the input files are already sorted (based on the position of the variants)", required = false, exclusiveOf = "", validation = "")
-  var assumeSorted: Boolean = _
-
-  /** which type of IndexCreator to use for VCF/BCF indices */
-  @Argument(fullName = "variant_index_type", shortName = "", doc = "which type of IndexCreator to use for VCF/BCF indices", required = false, exclusiveOf = "", validation = "")
-  var variant_index_type: Option[String] = None
-
-  /** the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator */
-  @Argument(fullName = "variant_index_parameter", shortName = "", doc = "the parameter (bin width or features per bin) to pass to the VCF/BCF IndexCreator", required = false, exclusiveOf = "", validation = "")
-  var variant_index_parameter: Option[Int] = None
-
-  /** Set the minimum level of logging */
-  @Argument(fullName = "logging_level", shortName = "l", doc = "Set the minimum level of logging", required = false, exclusiveOf = "", validation = "")
-  var logging_level: String = _
-
-  /** Set the logging location */
-  @Output(fullName = "log_to_file", shortName = "log", doc = "Set the logging location", required = false, exclusiveOf = "", validation = "")
-  @Gather(classOf[org.broadinstitute.gatk.queue.function.scattergather.SimpleTextGatherFunction])
-  var log_to_file: File = _
-
-  override def cmdLine = super.cmdLine +
-    required("-R", reference, spaceSeparated = true, escape = true, format = "%s") +
-    repeat("-V", variant, spaceSeparated = true, escape = true, format = "%s") +
-    required("-out", outputFile, spaceSeparated = true, escape = true, format = "%s") +
-    conditional(assumeSorted, "-assumeSorted", escape = true, format = "%s") +
-    optional("--variant_index_type", variant_index_type, spaceSeparated = true, escape = true, format = "%s") +
-    optional("--variant_index_parameter", variant_index_parameter, spaceSeparated = true, escape = true, format = "%s") +
-    optional("-l", logging_level, spaceSeparated = true, escape = true, format = "%s") +
-    optional("-log", log_to_file, spaceSeparated = true, escape = true, format = "%s")
-}
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineVariants.scala
deleted file mode 100644
index 7873ba3e44a3be042a923024af37a36922bb46d4..0000000000000000000000000000000000000000
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/CombineVariants.scala
+++ /dev/null
@@ -1,128 +0,0 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
-
-import java.io.File
-
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
-import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
-import nl.lumc.sasc.biopet.utils.VcfUtils
-import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
-
-class CombineVariants(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
-  def analysis_type = "CombineVariants"
-  scatterClass = classOf[LocusScatterFunction]
-  setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }
-
-  /** VCF files to merge together */
-  @Input(fullName = "variant", shortName = "V", doc = "VCF files to merge together", required = true, exclusiveOf = "", validation = "")
-  var variant: Seq[File] = Nil
-
-  /** File to which variants should be written */
-  @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "")
-  @Gather(classOf[CatVariantsGatherer])
-  var out: File = _
-
-  /** Determines how we should merge genotype records for samples shared across the ROD files */
-  @Argument(fullName = "genotypemergeoption", shortName = "genotypeMergeOptions", doc = "Determines how we should merge genotype records for samples shared across the ROD files", required = false, exclusiveOf = "", validation = "")
-  var genotypemergeoption: Option[String] = config("genotypemergeoption")
-
-  /** Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields */
-  @Argument(fullName = "filteredrecordsmergetype", shortName = "filteredRecordsMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required = false, exclusiveOf = "", validation = "")
-  var filteredrecordsmergetype: Option[String] = config("filteredrecordsmergetype")
-
-  /** Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel) */
-  @Argument(fullName = "multipleallelesmergetype", shortName = "multipleAllelesMergeType", doc = "Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required = false, exclusiveOf = "", validation = "")
-  var multipleallelesmergetype: Option[String] = config("multipleallelesmergetype")
-
-  /** Ordered list specifying priority for merging */
-  @Argument(fullName = "rod_priority_list", shortName = "priority", doc = "Ordered list specifying priority for merging", required = false, exclusiveOf = "", validation = "")
-  var rod_priority_list: Option[String] = config("rod_priority_list")
-
-  /** Emit interesting sites requiring complex compatibility merging to file */
-  @Argument(fullName = "printComplexMerges", shortName = "printComplexMerges", doc = "Emit interesting sites requiring complex compatibility merging to file", required = false, exclusiveOf = "", validation = "")
-  var printComplexMerges: Boolean = config("printComplexMerges", default = false)
-
-  /** Treat filtered variants as uncalled */
-  @Argument(fullName = "filteredAreUncalled", shortName = "filteredAreUncalled", doc = "Treat filtered variants as uncalled", required = false, exclusiveOf = "", validation = "")
-  var filteredAreUncalled: Boolean = config("filteredAreUncalled", default = false)
-
-  /** Emit a sites-only file */
-  @Argument(fullName = "minimalVCF", shortName = "minimalVCF", doc = "Emit a sites-only file", required = false, exclusiveOf = "", validation = "")
-  var minimalVCF: Boolean = config("minimalVCF", default = false)
-
-  /** Exclude sites where no variation is present after merging */
-  @Argument(fullName = "excludeNonVariants", shortName = "env", doc = "Exclude sites where no variation is present after merging", required = false, exclusiveOf = "", validation = "")
-  var excludeNonVariants: Boolean = config("excludeNonVariants", default = false)
-
-  /** Key name for the set attribute */
-  @Argument(fullName = "setKey", shortName = "setKey", doc = "Key name for the set attribute", required = false, exclusiveOf = "", validation = "")
-  var setKey: Option[String] = config("set_key")
-
-  /** Assume input VCFs have identical sample sets and disjoint calls */
-  @Argument(fullName = "assumeIdenticalSamples", shortName = "assumeIdenticalSamples", doc = "Assume input VCFs have identical sample sets and disjoint calls", required = false, exclusiveOf = "", validation = "")
-  var assumeIdenticalSamples: Boolean = config("assumeIdenticalSamples", default = false)
-
-  /** Minimum number of input files the site must be observed in to be included */
-  @Argument(fullName = "minimumN", shortName = "minN", doc = "Minimum number of input files the site must be observed in to be included", required = false, exclusiveOf = "", validation = "")
-  var minimumN: Option[Int] = config("minimumN")
-
-  /** Do not output the command line to the header */
-  @Argument(fullName = "suppressCommandLineHeader", shortName = "suppressCommandLineHeader", doc = "Do not output the command line to the header", required = false, exclusiveOf = "", validation = "")
-  var suppressCommandLineHeader: Boolean = config("suppressCommandLineHeader", default = false)
-
-  /** Use the INFO content of the record with the highest AC */
-  @Argument(fullName = "mergeInfoWithMaxAC", shortName = "mergeInfoWithMaxAC", doc = "Use the INFO content of the record with the highest AC", required = false, exclusiveOf = "", validation = "")
-  var mergeInfoWithMaxAC: Boolean = config("mergeInfoWithMaxAC", default = false)
-
-  /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */
-  @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
-  var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false)
-
-  /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */
-  @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
-  var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false)
-
-  /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */
-  @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "")
-  var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false)
-
-  @Output
-  @Gather(enabled = false)
-  private var outputIndex: File = _
-
-  override def beforeGraph() {
-    super.beforeGraph()
-    deps ++= variant.filter(orig => orig != null && (!orig.getName.endsWith(".list"))).map(orig => VcfUtils.getVcfIndexFile(orig))
-    if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out))
-      outputIndex = VcfUtils.getVcfIndexFile(out)
-  }
-
-  override def cmdLine = super.cmdLine +
-    repeat("-V", variant, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") +
-    optional("-o", out, spaceSeparated = true, escape = true, format = "%s") +
-    optional("-genotypeMergeOptions", genotypemergeoption, spaceSeparated = true, escape = true, format = "%s") +
-    optional("-filteredRecordsMergeType", filteredrecordsmergetype, spaceSeparated = true, escape = true, format = "%s") +
-    optional("-multipleAllelesMergeType", multipleallelesmergetype, spaceSeparated = true, escape = true, format = "%s") +
-    optional("-priority", rod_priority_list, spaceSeparated = true, escape = true, format = "%s") +
-    conditional(printComplexMerges, "-printComplexMerges", escape = true, format = "%s") +
-    conditional(filteredAreUncalled, "-filteredAreUncalled", escape = true, format = "%s") +
-    conditional(minimalVCF, "-minimalVCF", escape = true, format = "%s") +
-    conditional(excludeNonVariants, "-env", escape = true, format = "%s") +
-    optional("-setKey", setKey, spaceSeparated = true, escape = true, format = "%s") +
-    conditional(assumeIdenticalSamples, "-assumeIdenticalSamples", escape = true, format = "%s") +
-    optional("-minN", minimumN, spaceSeparated = true, escape = true, format = "%s") +
-    conditional(suppressCommandLineHeader, "-suppressCommandLineHeader", escape = true, format = "%s") +
-    conditional(mergeInfoWithMaxAC, "-mergeInfoWithMaxAC", escape = true, format = "%s") +
-    conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") +
-    conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") +
-    conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s")
-}
-
-object CombineVariants {
-  def apply(root: Configurable, input: List[File], output: File): CombineVariants = {
-    val cv = new CombineVariants(root)
-    cv.variant = input
-    cv.out = output
-    cv
-  }
-}
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala
deleted file mode 100644
index b8c3e6ba238836cf5a1c6f71a534a0359275e8c8..0000000000000000000000000000000000000000
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala
+++ /dev/null
@@ -1,44 +0,0 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
-
-import nl.lumc.sasc.biopet.core._
-import org.broadinstitute.gatk.engine.phonehome.GATKRunReport
-
-/**
- * @deprecated
- */
-trait GatkGeneral extends org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK with CommandLineResources with Reference with Version {
-  var executable: String = config("java", default = "java", namespace = "java", freeVar = false)
-
-  override def subPath = "gatk" :: super.subPath
-
-  jarFile = config("gatk_jar")
-
-  reference_sequence = referenceFasta()
-
-  override def defaultCoreMemory = 4.0
-  override def faiRequired = true
-  override def dictRequired = true
-
-  if (config.contains("intervals")) intervals = config("intervals").asFileList
-  if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList
-
-  Option(config("et").value) match {
-    case Some("NO_ET")  => et = GATKRunReport.PhoneHomeOption.NO_ET
-    case Some("AWS")    => et = GATKRunReport.PhoneHomeOption.AWS
-    case Some("STDOUT") => et = GATKRunReport.PhoneHomeOption.STDOUT
-    case Some(x)        => throw new IllegalArgumentException(s"Unknown et option for gatk: $x")
-    case _              =>
-  }
-
-  if (config.contains("gatk_key")) gatk_key = config("gatk_key")
-  if (config.contains("pedigree")) pedigree = config("pedigree")
-
-  def versionRegex = """(.*)""".r
-  override def versionExitcode = List(0, 1)
-  def versionCommand = "java" + " -jar " + jarFile + " -version"
-
-  override def getVersion = {
-    BiopetCommandLineFunction.preProcessExecutable(executable).path.foreach(executable = _)
-    super.getVersion.collect { case v => "Gatk " + v }
-  }
-}
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/SelectVariants.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/SelectVariants.scala
deleted file mode 100644
index dd49dd75da5fb6d91f5cc0826338796799a198c3..0000000000000000000000000000000000000000
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/SelectVariants.scala
+++ /dev/null
@@ -1,262 +0,0 @@
-package nl.lumc.sasc.biopet.extensions.gatk.broad
-
-import java.io.File
-
-import nl.lumc.sasc.biopet.utils.config.Configurable
-import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
-import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
-import nl.lumc.sasc.biopet.utils.VcfUtils
-import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
-
-class SelectVariants(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
-  def analysis_type = "SelectVariants"
-  scatterClass = classOf[LocusScatterFunction]
-  setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }
-
-  /** Input VCF file */
-  @Input(fullName = "variant", shortName = "V", doc = "Input VCF file", required = true, exclusiveOf = "", validation = "")
-  var variant: File = _
-
-  /** Output variants not called in this comparison track */
-  @Input(fullName = "discordance", shortName = "disc", doc = "Output variants not called in this comparison track", required = false, exclusiveOf = "", validation = "")
-  var discordance: Option[File] = None
-
-  /** Output variants also called in this comparison track */
-  @Input(fullName = "concordance", shortName = "conc", doc = "Output variants also called in this comparison track", required = false, exclusiveOf = "", validation = "")
-  var concordance: Option[File] = None
-
-  /** File to which variants should be written */
-  @Output(fullName = "out", shortName = "o", doc = "File to which variants should be written", required = false, exclusiveOf = "", validation = "")
-  @Gather(classOf[CatVariantsGatherer])
-  var out: File = _
-
-  /** Include genotypes from this sample */
-  @Argument(fullName = "sample_name", shortName = "sn", doc = "Include genotypes from this sample", required = false, exclusiveOf = "", validation = "")
-  var sample_name: List[String] = config("sample_name", default = Nil)
-
-  /** Regular expression to select multiple samples */
-  @Argument(fullName = "sample_expressions", shortName = "se", doc = "Regular expression to select multiple samples", required = false, exclusiveOf = "", validation = "")
-  var sample_expressions: List[String] = config("sample_expressions", default = Nil)
-
-  /** File containing a list of samples to include */
-  @Input(fullName = "sample_file", shortName = "sf", doc = "File containing a list of samples to include", required = false, exclusiveOf = "", validation = "")
-  var sample_file: List[File] = config("sample_file", default = Nil)
-
-  /** Exclude genotypes from this sample */
-  @Argument(fullName = "exclude_sample_name", shortName = "xl_sn", doc = "Exclude genotypes from this sample", required = false, exclusiveOf = "", validation = "")
-  var exclude_sample_name: List[String] = config("exclude_sample_name", default = Nil)
-
-  /** List of samples to exclude */
-  @Input(fullName = "exclude_sample_file", shortName = "xl_sf", doc = "List of samples to exclude", required = false, exclusiveOf = "", validation = "")
-  var exclude_sample_file: List[File] = config("exclude_sample_file", default = Nil)
-
-  /** List of sample expressions to exclude */
-  @Input(fullName = "exclude_sample_expressions", shortName = "xl_se", doc = "List of sample expressions to exclude", required = false, exclusiveOf = "", validation = "")
-  var exclude_sample_expressions: List[File] = config("exclude_sample_expressions", default = Nil)
-
-  /** One or more criteria to use when selecting the data */
-  @Argument(fullName = "selectexpressions", shortName = "select", doc = "One or more criteria to use when selecting the data", required = false, exclusiveOf = "", validation = "")
-  var selectexpressions: List[String] = config("selectexpressions", default = Nil)
-
-  /** Invert the selection criteria for -select */
-  @Argument(fullName = "invertselect", shortName = "invertSelect", doc = "Invert the selection criteria for -select", required = false, exclusiveOf = "", validation = "")
-  var invertselect: Boolean = config("invertselect", default = false)
-
-  /** Don't include non-variant sites */
-  @Argument(fullName = "excludeNonVariants", shortName = "env", doc = "Don't include non-variant sites", required = false, exclusiveOf = "", validation = "")
-  var excludeNonVariants: Boolean = config("excludeNonVariants", default = false)
-
-  /** Don't include filtered sites */
-  @Argument(fullName = "excludeFiltered", shortName = "ef", doc = "Don't include filtered sites", required = false, exclusiveOf = "", validation = "")
-  var excludeFiltered: Boolean = config("excludeFiltered", default = false)
-
-  /** Preserve original alleles, do not trim */
-  @Argument(fullName = "preserveAlleles", shortName = "noTrim", doc = "Preserve original alleles, do not trim", required = false, exclusiveOf = "", validation = "")
-  var preserveAlleles: Boolean = config("preserveAlleles", default = false)
-
-  /** Remove alternate alleles not present in any genotypes */
-  @Argument(fullName = "removeUnusedAlternates", shortName = "trimAlternates", doc = "Remove alternate alleles not present in any genotypes", required = false, exclusiveOf = "", validation = "")
-  var removeUnusedAlternates: Boolean = config("removeUnusedAlternates", default = false)
-
-  /** Select only variants of a particular allelicity */
-  @Argument(fullName = "restrictAllelesTo", shortName = "restrictAllelesTo", doc = "Select only variants of a particular allelicity", required = false, exclusiveOf = "", validation = "")
-  var restrictAllelesTo: Option[String] = config("restrictAllelesTo")
-
-  /** Store the original AC, AF, and AN values after subsetting */
-  @Argument(fullName = "keepOriginalAC", shortName = "keepOriginalAC", doc = "Store the original AC, AF, and AN values after subsetting", required = false, exclusiveOf = "", validation = "")
-  var keepOriginalAC: Boolean = config("keepOriginalAC", default = false)
-
-  /** Store the original DP value after subsetting */
-  @Argument(fullName = "keepOriginalDP", shortName = "keepOriginalDP", doc = "Store the original DP value after subsetting", required = false, exclusiveOf = "", validation = "")
-  var keepOriginalDP: Boolean = config("keepOriginalDP", default = false)
-
-  /** Output mendelian violation sites only */
-  @Argument(fullName = "mendelianViolation", shortName = "mv", doc = "Output mendelian violation sites only", required = false, exclusiveOf = "", validation = "")
-  var mendelianViolation: Boolean = config("mendelianViolation", default = false)
-
-  /** Output non-mendelian violation sites only */
-  @Argument(fullName = "invertMendelianViolation", shortName = "invMv", doc = "Output non-mendelian violation sites only", required = false, exclusiveOf = "", validation = "")
-  var invertMendelianViolation: Boolean = config("invertMendelianViolation", default = false)
-
-  /** Minimum GQ score for each trio member to accept a site as a violation */
-  @Argument(fullName = "mendelianViolationQualThreshold", shortName = "mvq", doc = "Minimum GQ score for each trio member to accept a site as a violation", required = false, exclusiveOf = "", validation = "")
-  var mendelianViolationQualThreshold: Option[Double] = config("mendelianViolationQualThreshold")
-
-  /** Format string for mendelianViolationQualThreshold */
-  @Argument(fullName = "mendelianViolationQualThresholdFormat", shortName = "", doc = "Format string for mendelianViolationQualThreshold", required = false, exclusiveOf = "", validation = "")
-  var mendelianViolationQualThresholdFormat: String = "%s"
-
-  /** Select a fraction of variants at random from the input */
-  @Argument(fullName = "select_random_fraction", shortName = "fraction", doc = "Select a fraction of variants at random from the input", required = false, exclusiveOf = "", validation = "")
-  var select_random_fraction: Option[Double] = config("select_random_fraction")
-
-  /** Format string for select_random_fraction */
-  @Argument(fullName = "select_random_fractionFormat", shortName = "", doc = "Format string for select_random_fraction", required = false, exclusiveOf = "", validation = "")
-  var select_random_fractionFormat: String = "%s"
-
-  /** Select a fraction of genotypes at random from the input and sets them to no-call */
-  @Argument(fullName = "remove_fraction_genotypes", shortName = "fractionGenotypes", doc = "Select a fraction of genotypes at random from the input and sets them to no-call", required = false, exclusiveOf = "", validation = "")
-  var remove_fraction_genotypes: Option[Double] = config("remove_fraction_genotypes")
-
-  /** Format string for remove_fraction_genotypes */
-  @Argument(fullName = "remove_fraction_genotypesFormat", shortName = "", doc = "Format string for remove_fraction_genotypes", required = false, exclusiveOf = "", validation = "")
-  var remove_fraction_genotypesFormat: String = "%s"
-
-  /** Select only a certain type of variants from the input file */
-  @Argument(fullName = "selectTypeToInclude", shortName = "selectType", doc = "Select only a certain type of variants from the input file", required = false, exclusiveOf = "", validation = "")
-  var selectTypeToInclude: List[String] = config("selectTypeToInclude", default = Nil)
-
-  /** Do not select certain type of variants from the input file */
-  @Argument(fullName = "selectTypeToExclude", shortName = "xlSelectType", doc = "Do not select certain type of variants from the input file", required = false, exclusiveOf = "", validation = "")
-  var selectTypeToExclude: Seq[String] = config("selectTypeToExclude", default = Nil)
-
-  /** List of variant IDs to select */
-  @Input(fullName = "keepIDs", shortName = "IDs", doc = "List of variant IDs to select", required = false, exclusiveOf = "", validation = "")
-  var keepIDs: Option[File] = config("keepIDs")
-
-  /** List of variant IDs to select */
-  @Argument(fullName = "excludeIDs", shortName = "xlIDs", doc = "List of variant IDs to select", required = false, exclusiveOf = "", validation = "")
-  var excludeIDs: Option[File] = config("excludeIDs")
-
-  /** If true, the incoming VariantContext will be fully decoded */
-  @Argument(fullName = "fullyDecode", shortName = "", doc = "If true, the incoming VariantContext will be fully decoded", required = false, exclusiveOf = "", validation = "")
-  var fullyDecode: Boolean = config("fullyDecode", default = false)
-
-  /** If true, we won't actually write the output file.  For efficiency testing only */
-  @Argument(fullName = "justRead", shortName = "", doc = "If true, we won't actually write the output file.  For efficiency testing only", required = false, exclusiveOf = "", validation = "")
-  var justRead: Boolean = config("justRead", default = false)
-
-  /** Maximum size of indels to include */
-  @Argument(fullName = "maxIndelSize", shortName = "", doc = "Maximum size of indels to include", required = false, exclusiveOf = "", validation = "")
-  var maxIndelSize: Option[Int] = config("maxIndelSize")
-
-  /** Minimum size of indels to include */
-  @Argument(fullName = "minIndelSize", shortName = "", doc = "Minimum size of indels to include", required = false, exclusiveOf = "", validation = "")
-  var minIndelSize: Option[Int] = config("minIndelSize")
-
-  /** Maximum number of samples filtered at the genotype level */
-  @Argument(fullName = "maxFilteredGenotypes", shortName = "", doc = "Maximum number of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "")
-  var maxFilteredGenotypes: Option[Int] = config("maxFilteredGenotypes")
-
-  /** Minimum number of samples filtered at the genotype level */
-  @Argument(fullName = "minFilteredGenotypes", shortName = "", doc = "Minimum number of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "")
-  var minFilteredGenotypes: Option[Int] = config("minFilteredGenotypes")
-
-  /** Maximum fraction of samples filtered at the genotype level */
-  @Argument(fullName = "maxFractionFilteredGenotypes", shortName = "", doc = "Maximum fraction of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "")
-  var maxFractionFilteredGenotypes: Option[Double] = config("maxFractionFilteredGenotypes")
-
-  /** Format string for maxFractionFilteredGenotypes */
-  @Argument(fullName = "maxFractionFilteredGenotypesFormat", shortName = "", doc = "Format string for maxFractionFilteredGenotypes", required = false, exclusiveOf = "", validation = "")
-  var maxFractionFilteredGenotypesFormat: String = "%s"
-
-  /** Maximum fraction of samples filtered at the genotype level */
-  @Argument(fullName = "minFractionFilteredGenotypes", shortName = "", doc = "Maximum fraction of samples filtered at the genotype level", required = false, exclusiveOf = "", validation = "")
-  var minFractionFilteredGenotypes: Option[Double] = config("minFractionFilteredGenotypes")
-
-  /** Format string for minFractionFilteredGenotypes */
-  @Argument(fullName = "minFractionFilteredGenotypesFormat", shortName = "", doc = "Format string for minFractionFilteredGenotypes", required = false, exclusiveOf = "", validation = "")
-  var minFractionFilteredGenotypesFormat: String = "%s"
-
-  /** Set filtered genotypes to no-call */
-  @Argument(fullName = "setFilteredGtToNocall", shortName = "", doc = "Set filtered genotypes to no-call", required = false, exclusiveOf = "", validation = "")
-  var setFilteredGtToNocall: Boolean = config("setFilteredGtToNocall", default = false)
-
-  /** Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored. */
-  @Argument(fullName = "ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", shortName = "", doc = "Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.", required = false, exclusiveOf = "", validation = "")
-  var ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES: Boolean = config("ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", default = false)
-
-  /** Forces output VCF to be compliant to up-to-date version */
-  @Argument(fullName = "forceValidOutput", shortName = "", doc = "Forces output VCF to be compliant to up-to-date version", required = false, exclusiveOf = "", validation = "")
-  var forceValidOutput: Boolean = config("forceValidOutput", default = false)
-
-  /** Filter out reads with CIGAR containing the N operator, instead of failing with an error */
-  @Argument(fullName = "filter_reads_with_N_cigar", shortName = "filterRNC", doc = "Filter out reads with CIGAR containing the N operator, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
-  var filter_reads_with_N_cigar: Boolean = config("filter_reads_with_N_cigar", default = false)
-
-  /** Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error */
-  @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "Filter out reads with mismatching numbers of bases and base qualities, instead of failing with an error", required = false, exclusiveOf = "", validation = "")
-  var filter_mismatching_base_and_quals: Boolean = config("filter_mismatching_base_and_quals", default = false)
-
-  /** Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error */
-  @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "Filter out reads with no stored bases (i.e. '*' where the sequence should be), instead of failing with an error", required = false, exclusiveOf = "", validation = "")
-  var filter_bases_not_stored: Boolean = config("filter_bases_not_stored", default = false)
-
-  @Output
-  @Gather(enabled = false)
-  private var outputIndex: File = _
-
-  override def beforeGraph() {
-    super.beforeGraph()
-    if (variant != null)
-      deps :+= VcfUtils.getVcfIndexFile(variant)
-    discordance.foreach(deps :+= VcfUtils.getVcfIndexFile(_))
-    concordance.foreach(deps :+= VcfUtils.getVcfIndexFile(_))
-    if (out != null && !org.broadinstitute.gatk.utils.io.IOUtils.isSpecialFile(out))
-      outputIndex = VcfUtils.getVcfIndexFile(out)
-  }
-
-  override def cmdLine = super.cmdLine +
-    required(TaggedFile.formatCommandLineParameter("-V", variant), variant, spaceSeparated = true, escape = true, format = "%s") +
-    optional(TaggedFile.formatCommandLineParameter("-disc", discordance), discordance, spaceSeparated = true, escape = true, format = "%s") +
-    optional(TaggedFile.formatCommandLineParameter("-conc", concordance), concordance, spaceSeparated = true, escape = true, format = "%s") +
-    optional("-o", out, spaceSeparated = true, escape = true, format = "%s") +
-    repeat("-sn", sample_name, spaceSeparated = true, escape = true, format = "%s") +
-    repeat("-se", sample_expressions, spaceSeparated = true, escape = true, format = "%s") +
-    repeat("-sf", sample_file, spaceSeparated = true, escape = true, format = "%s") +
-    repeat("-xl_sn", exclude_sample_name, spaceSeparated = true, escape = true, format = "%s") +
-    repeat("-xl_sf", exclude_sample_file, spaceSeparated = true, escape = true, format = "%s") +
-    repeat("-xl_se", exclude_sample_expressions, spaceSeparated = true, escape = true, format = "%s") +
-    repeat("-select", selectexpressions, spaceSeparated = true, escape = true, format = "%s") +
-    conditional(invertselect, "-invertSelect", escape = true, format = "%s") +
-    conditional(excludeNonVariants, "-env", escape = true, format = "%s") +
-    conditional(excludeFiltered, "-ef", escape = true, format = "%s") +
-    conditional(preserveAlleles, "-noTrim", escape = true, format = "%s") +
-    conditional(removeUnusedAlternates, "-trimAlternates", escape = true, format = "%s") +
-    optional("-restrictAllelesTo", restrictAllelesTo, spaceSeparated = true, escape = true, format = "%s") +
-    conditional(keepOriginalAC, "-keepOriginalAC", escape = true, format = "%s") +
-    conditional(keepOriginalDP, "-keepOriginalDP", escape = true, format = "%s") +
-    conditional(mendelianViolation, "-mv", escape = true, format = "%s") +
-    conditional(invertMendelianViolation, "-invMv", escape = true, format = "%s") +
-    optional("-mvq", mendelianViolationQualThreshold, spaceSeparated = true, escape = true, format = mendelianViolationQualThresholdFormat) +
-    optional("-fraction", select_random_fraction, spaceSeparated = true, escape = true, format = select_random_fractionFormat) +
-    optional("-fractionGenotypes", remove_fraction_genotypes, spaceSeparated = true, escape = true, format = remove_fraction_genotypesFormat) +
-    repeat("-selectType", selectTypeToInclude, spaceSeparated = true, escape = true, format = "%s") +
-    repeat("-xlSelectType", selectTypeToExclude, spaceSeparated = true, escape = true, format = "%s") +
-    optional("-IDs", keepIDs, spaceSeparated = true, escape = true, format = "%s") +
-    optional("-xlIDs", excludeIDs, spaceSeparated = true, escape = true, format = "%s") +
-    conditional(fullyDecode, "--fullyDecode", escape = true, format = "%s") +
-    conditional(justRead, "--justRead", escape = true, format = "%s") +
-    optional("--maxIndelSize", maxIndelSize, spaceSeparated = true, escape = true, format = "%s") +
-    optional("--minIndelSize", minIndelSize, spaceSeparated = true, escape = true, format = "%s") +
-    optional("--maxFilteredGenotypes", maxFilteredGenotypes, spaceSeparated = true, escape = true, format = "%s") +
-    optional("--minFilteredGenotypes", minFilteredGenotypes, spaceSeparated = true, escape = true, format = "%s") +
-    optional("--maxFractionFilteredGenotypes", maxFractionFilteredGenotypes, spaceSeparated = true, escape = true, format = maxFractionFilteredGenotypesFormat) +
-    optional("--minFractionFilteredGenotypes", minFractionFilteredGenotypes, spaceSeparated = true, escape = true, format = minFractionFilteredGenotypesFormat) +
-    conditional(setFilteredGtToNocall, "--setFilteredGtToNocall", escape = true, format = "%s") +
-    conditional(ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES, "--ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", escape = true, format = "%s") +
-    conditional(forceValidOutput, "--forceValidOutput", escape = true, format = "%s") +
-    conditional(filter_reads_with_N_cigar, "-filterRNC", escape = true, format = "%s") +
-    conditional(filter_mismatching_base_and_quals, "-filterMBQ", escape = true, format = "%s") +
-    conditional(filter_bases_not_stored, "-filterNoBases", escape = true, format = "%s")
-}
diff --git a/biopet-tools-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilterTest.scala b/biopet-tools-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilterTest.scala
index cc98b0c7ad39f729bae0af7ed8ff90604f82e5b3..5444ade3b60a58c4e94a312e6c4d24a156c42c84 100644
--- a/biopet-tools-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilterTest.scala
+++ b/biopet-tools-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilterTest.scala
@@ -19,7 +19,7 @@ import java.io.File
 
 import org.scalatest.Matchers
 import org.scalatest.testng.TestNGSuite
-import org.testng.annotations.{ DataProvider, Test }
+import org.testng.annotations.Test
 
 /**
  * Created by ahbbollen on 2-3-16.
@@ -44,34 +44,26 @@ class VcfFilterTest extends TestNGSuite with Matchers {
     filterer.outputVcfIndex.getAbsolutePath shouldBe oVcf.getAbsolutePath + ".tbi"
   }
 
-  @DataProvider(name = "functions")
-  def functions = {
-    Array(
-      () => testCommand(minSampleDepth = Some(2)),
-      () => testCommand(minTotalDepth = Some(2)),
-      () => testCommand(minAlternateDepth = Some(2)),
-      () => testCommand(minSamplesPass = Some(2)),
-      () => testCommand(minGenomeQuality = Some(50)),
-      () => testCommand(filterRefCalls = true),
-      () => testCommand(invertedOutputVcf = Some(File.createTempFile("vcfFilter", ".vcf"))),
-      () => testCommand(resToDom = Some("dummy")),
-      () => testCommand(trioCompound = Some("dummy")),
-      () => testCommand(deNovoInSample = Some("dummy")),
-      () => testCommand(deNovoTrio = Some("dummy")),
-      () => testCommand(trioLossOfHet = Some("dummy")),
-      () => testCommand(mustHaveVariant = List("sample1", "sample2")),
-      () => testCommand(calledIn = List("sample1", "sample2")),
-      () => testCommand(mustHaveGenotype = List("sample1:HET", "sample2:HET")),
-      () => testCommand(diffGenotype = List("sample1:sample2", "sample2:sample3")),
-      () => testCommand(minQualScore = Some(50.0)),
-      () => testCommand(filterHetVarToHomVar = List("dummy")),
-      () => testCommand(id = List("rs01", "rs02")),
-      () => testCommand(idFile = Some(File.createTempFile("vcfFilter", ".txt")))
-    ).map(Array(_))
-  }
-
-  @Test(dataProvider = "functions")
-  def executer(function0: Function0[Unit]): Unit = function0()
+  @Test def testMinSampleDepth() = testCommand(minSampleDepth = Some(2))
+  @Test def testMinTotalDepth() = testCommand(minTotalDepth = Some(2))
+  @Test def testMinAlternateDepth() = testCommand(minAlternateDepth = Some(2))
+  @Test def testMinSamplesPass() = testCommand(minSamplesPass = Some(2))
+  @Test def testMinGenomeQuality() = testCommand(minGenomeQuality = Some(50))
+  @Test def testFilterRefCalls() = testCommand(filterRefCalls = true)
+  @Test def testInvertedOutputVcf() = testCommand(invertedOutputVcf = Some(File.createTempFile("vcfFilter", ".vcf")))
+  @Test def testResToDom() = testCommand(resToDom = Some("dummy"))
+  @Test def testTrioCompound() = testCommand(trioCompound = Some("dummy"))
+  @Test def testDeNovoInSample() = testCommand(deNovoInSample = Some("dummy"))
+  @Test def testDeNovoTrio() = testCommand(deNovoTrio = Some("dummy"))
+  @Test def testTrioLossOfHet() = testCommand(trioLossOfHet = Some("dummy"))
+  @Test def testMustHaveVariant() = testCommand(mustHaveVariant = List("sample1", "sample2"))
+  @Test def testCalledIn() = testCommand(calledIn = List("sample1", "sample2"))
+  @Test def testMustHaveGenotype() = testCommand(mustHaveGenotype = List("sample1:HET", "sample2:HET"))
+  @Test def testDiffGenotype() = testCommand(diffGenotype = List("sample1:sample2", "sample2:sample3"))
+  @Test def testMinQualScore() = testCommand(minQualScore = Some(50.0))
+  @Test def testFilterHetVarToHomVar() = testCommand(filterHetVarToHomVar = List("dummy"))
+  @Test def testId() = testCommand(id = List("rs01", "rs02"))
+  @Test def testIdFile() = testCommand(idFile = Some(File.createTempFile("vcfFilter", ".txt")))
 
   protected def testCommand(minSampleDepth: Option[Int] = None,
                             minTotalDepth: Option[Int] = None,
diff --git a/docs/pipelines/toucan.md b/docs/pipelines/toucan.md
index 12f4ea108c54402f51d37e6f673d97e633058d6f..5f362bf13fa083d571f57e03f95351fc1231a16b 100644
--- a/docs/pipelines/toucan.md
+++ b/docs/pipelines/toucan.md
@@ -83,6 +83,25 @@ The following config values are optional:
 Annotation queries can be set by the `annotation_queries` config value in the `manwe` config namespace. 
 By default, a global query is returned. 
 
+
+###Groups
+In case you want to add your samples to a specific group in your varda database, you can use the tagging system in your sample config.
+Specifically, the `varda_group` tag should be a list of strings pointing to group. 
+
+E.g. :
+
+```json
+{
+    "samples": {
+        "sample1": {
+            "tags": {
+                "varda_group": ["group1", "group2"]
+            }
+        }
+    }
+}
+```
+
 Running the pipeline
 ---------------
 The command to run the pipeline is:
diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala
index fc8db7ab30f7581c7638f15c48bba6e9443eb195..3cb06df0e160cb97b98710de74f7ca9fa31ce919 100644
--- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala
+++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala
@@ -16,6 +16,7 @@
 package nl.lumc.sasc.biopet.pipelines.flexiprep
 
 import nl.lumc.sasc.biopet.utils.config.Configurable
+import scala.collection.JavaConversions._
 
 /**
  * Cutadapt wrapper specific for Flexiprep.
@@ -41,23 +42,26 @@ class Cutadapt(root: Configurable, fastqc: Fastqc) extends nl.lumc.sasc.biopet.e
     val adapterCounts: Map[String, Any] = initStats.get(adaptersStatsName) match {
       // "adapters" key found in statistics
       case Some(m: Map[_, _]) => m.flatMap {
-        case (seq: String, count) =>
-          seqToNameMap.get(seq) match {
+        case (adapterSequence: String, adapterStats: Map[_, _]) =>
+          seqToNameMap.get(adapterSequence) match {
             // adapter sequence is found by FastQC
-            case Some(n) => Some(n -> Map("sequence" -> seq, "count" -> count))
+            case Some(adapterSeqName) => {
+              Some(adapterSeqName ->
+                Map("sequence" -> adapterSequence, "stats" -> adapterStats.toMap)
+              )
+            }
             // adapter sequence is clipped but not found by FastQC ~ should not happen since all clipped adapter
             // sequences come from FastQC
             case _ =>
-              throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in '$fastqInput'.")
+              throw new IllegalStateException(s"Adapter '$adapterSequence' is clipped but not found by FastQC in '$fastqInput'.")
           }
         // FastQC found no adapters
         case otherwise =>
-          ;
           logger.debug(s"No adapters found for summarizing in '$fastqInput'.")
           None
       }
       // "adapters" key not found ~ something went wrong in our part
-      case _ => throw new RuntimeException(s"Required key 'adapters' not found in stats entry '$fastqInput'.")
+      case _ => throw new RuntimeException(s"Required key '${adaptersStatsName}' not found in stats entry '${fastqInput}'.")
     }
     initStats.updated(adaptersStatsName, adapterCounts)
   }
diff --git a/flexiprep/src/test/resources/ct-test.R1.clip.stats b/flexiprep/src/test/resources/ct-test.R1.clip.stats
new file mode 100644
index 0000000000000000000000000000000000000000..4a280ef0a7d2588169c02b5e40432f4f903c69b8
--- /dev/null
+++ b/flexiprep/src/test/resources/ct-test.R1.clip.stats
@@ -0,0 +1,160 @@
+This is cutadapt 1.9.1 with Python 2.7.6
+Command line parameters: -b CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC -b GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG -b CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC -b GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG --error-rate 0.2 --times 2 -m 15 ct_r1.fq.gz.seqtk.fq --output ct_r1.fq.gz.cutadapt.fq
+Trimming 4 adapters with at most 20.0% errors in single-end mode ...
+Finished in 0.19 s (189 us/read; 0.32 M reads/minute).
+
+=== Summary ===
+
+Total reads processed:                   1,000
+Reads with adapters:                       440 (44.0%)
+Reads that were too short:                  15 (1.5%)
+Reads written (passing filters):           985 (98.5%)
+
+Total basepairs processed:       100,000 bp
+Total written (filtered):         89,423 bp (89.4%)
+
+=== Adapter 1 ===
+
+Sequence: CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC; Type: variable 5'/3'; Length: 63; Trimmed: 94 times.
+18 times, it overlapped the 5' end of a read
+76 times, it overlapped the 3' end or was within the read
+
+No. of allowed errors:
+0-4 bp: 0; 5-9 bp: 1; 10-14 bp: 2; 15-19 bp: 3; 20-24 bp: 4; 25-29 bp: 5; 30-34 bp: 6; 35-39 bp: 7; 40-44 bp: 8; 45-49 bp: 9; 50-54 bp: 10; 55-59 bp: 11; 60-63 bp: 12
+
+Overview of removed sequences (5')
+length	count	expect	max.err	error counts
+3	8	15.6	0	8
+4	3	3.9	0	2 1
+5	2	1.0	1	0 2
+6	4	0.2	1	1 3
+9	1	0.0	1	0 0 1
+
+
+Overview of removed sequences (3' or within)
+length	count	expect	max.err	error counts
+3	13	15.6	0	13
+4	19	3.9	0	3 16
+5	21	1.0	1	0 21
+6	18	0.2	1	1 17
+7	2	0.1	1	0 2
+9	1	0.0	1	0 0 1
+11	1	0.0	2	0 0 1
+12	1	0.0	2	0 0 1
+
+=== Adapter 2 ===
+
+Sequence: GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG; Type: variable 5'/3'; Length: 63; Trimmed: 340 times.
+117 times, it overlapped the 5' end of a read
+223 times, it overlapped the 3' end or was within the read
+
+No. of allowed errors:
+0-4 bp: 0; 5-9 bp: 1; 10-14 bp: 2; 15-19 bp: 3; 20-24 bp: 4; 25-29 bp: 5; 30-34 bp: 6; 35-39 bp: 7; 40-44 bp: 8; 45-49 bp: 9; 50-54 bp: 10; 55-59 bp: 11; 60-63 bp: 12
+
+Overview of removed sequences (5')
+length	count	expect	max.err	error counts
+3	14	15.6	0	14
+4	29	3.9	0	6 23
+5	32	1.0	1	3 29
+6	36	0.2	1	0 36
+8	1	0.0	1	0 1
+9	1	0.0	1	0 0 1
+10	1	0.0	2	0 0 1
+11	2	0.0	2	0 0 2
+37	1	0.0	7	0 0 0 0 0 1
+
+
+Overview of removed sequences (3' or within)
+length	count	expect	max.err	error counts
+3	18	15.6	0	18
+4	9	3.9	0	5 4
+5	15	1.0	1	8 7
+6	10	0.2	1	8 2
+7	7	0.1	1	5 2
+8	10	0.0	1	9 1
+9	6	0.0	1	5 1
+10	8	0.0	2	5 0 3
+11	4	0.0	2	4
+12	4	0.0	2	4
+13	9	0.0	2	9
+14	4	0.0	2	3 0 1
+15	7	0.0	3	7
+16	2	0.0	3	2
+17	4	0.0	3	2 1 0 1
+18	2	0.0	3	2
+19	2	0.0	3	2
+20	2	0.0	4	0 1 1
+21	7	0.0	4	6 1
+22	7	0.0	4	7
+23	2	0.0	4	2
+24	3	0.0	4	3
+25	5	0.0	5	5
+26	5	0.0	5	5
+27	8	0.0	5	8
+28	6	0.0	5	5 1
+29	2	0.0	5	2
+30	5	0.0	6	5
+31	3	0.0	6	3
+32	8	0.0	6	8
+33	1	0.0	6	1
+34	5	0.0	6	0 5
+35	2	0.0	7	0 0 0 0 0 0 2
+36	3	0.0	7	0 0 0 0 0 0 3
+37	4	0.0	7	0 0 0 0 0 0 0 2 2
+38	2	0.0	7	0 0 0 0 0 0 0 0 0 2
+39	4	0.0	7	0 0 0 0 1 0 0 0 0 3
+40	3	0.0	8	0 0 0 0 0 0 0 3
+41	1	0.0	8	0 0 0 0 0 0 0 1
+42	4	0.0	8	0 0 0 0 0 0 0 0 4
+43	5	0.0	8	0 0 0 0 0 0 0 0 0 5
+44	3	0.0	8	0 0 0 0 0 0 0 0 0 0 3
+46	1	0.0	9	0 0 0 0 0 0 0 0 0 0 1
+49	1	0.0	9	0 0 0 0 0 1
+
+=== Adapter 3 ===
+
+Sequence: CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC; Type: variable 5'/3'; Length: 63; Trimmed: 0 times.
+
+=== Adapter 4 ===
+
+Sequence: GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG; Type: variable 5'/3'; Length: 63; Trimmed: 82 times.
+15 times, it overlapped the 5' end of a read
+67 times, it overlapped the 3' end or was within the read
+
+No. of allowed errors:
+0-4 bp: 0; 5-9 bp: 1; 10-14 bp: 2; 15-19 bp: 3; 20-24 bp: 4; 25-29 bp: 5; 30-34 bp: 6; 35-39 bp: 7; 40-44 bp: 8; 45-49 bp: 9; 50-54 bp: 10; 55-59 bp: 11; 60-63 bp: 12
+
+Overview of removed sequences (5')
+length	count	expect	max.err	error counts
+26	1	0.0	5	0 1
+61	2	0.0	12	0 0 0 2
+64	11	0.0	12	0 0 0 11
+72	1	0.0	12	0 0 0 0 0 0 0 0 0 0 0 1
+
+
+Overview of removed sequences (3' or within)
+length	count	expect	max.err	error counts
+45	3	0.0	9	0 0 0 3
+46	2	0.0	9	0 0 0 2
+47	3	0.0	9	0 0 0 3
+48	3	0.0	9	0 0 0 3
+49	2	0.0	9	0 0 0 2
+50	3	0.0	10	0 0 0 3
+51	2	0.0	10	0 0 0 2
+52	6	0.0	10	0 0 0 6
+53	1	0.0	10	0 0 0 1
+54	5	0.0	10	0 0 0 4 0 1
+56	2	0.0	11	0 0 0 2
+57	2	0.0	11	0 0 0 2
+58	2	0.0	11	0 0 0 2
+59	3	0.0	11	0 0 0 2 0 0 0 0 0 1
+61	1	0.0	12	0 0 0 0 0 1
+62	3	0.0	12	0 0 0 2 1
+63	1	0.0	12	0 0 0 0 1
+66	3	0.0	12	0 0 0 3
+67	3	0.0	12	0 0 0 3
+70	1	0.0	12	0 0 0 1
+72	1	0.0	12	0 0 0 1
+80	1	0.0	12	0 0 0 1
+99	14	0.0	12	0 0 0 14
+
diff --git a/flexiprep/src/test/resources/fqc_contaminants_v0112.txt b/flexiprep/src/test/resources/fqc_contaminants_v0112.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2c29bee8171e0454994c6d7d6f0f4780efb3921
--- /dev/null
+++ b/flexiprep/src/test/resources/fqc_contaminants_v0112.txt
@@ -0,0 +1,182 @@
+# This file contains a list of potential contaminants which are
+# frequently found in high throughput sequencing reactions.  These
+# are mostly sequences of adapters / primers used in the various
+# sequencing chemistries.
+# 
+# Please DO NOT rely on these sequences to design your own oligos, some
+# of them are truncated at ambiguous positions, and none of them are
+# definitive sequences from the manufacturers so don't blame us if you
+# try to use them and they don't work.
+#
+# You can add more sequences to the file by putting one line per entry
+# and specifying a name[tab]sequence.  If the contaminant you add is 
+# likely to be of use to others please consider sending it to the FastQ
+# authors, either via a bug report at www.bioinformatics.babraham.ac.uk/bugzilla/
+# or by directly emailing simon.andrews@babraham.ac.uk so other users of
+# the program can benefit.
+
+Illumina Single End Adapter 1					GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG
+Illumina Single End Adapter 2					CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
+Illumina Single End PCR Primer 1				AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
+Illumina Single End PCR Primer 2				CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
+Illumina Single End Sequencing Primer			ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+
+Illumina Paired End Adapter 1					ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+Illumina Paired End Adapter 2					GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
+Illumina Paried End PCR Primer 1				AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
+Illumina Paired End PCR Primer 2				CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
+Illumina Paried End Sequencing Primer 1			ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+Illumina Paired End Sequencing Primer 2			CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
+
+Illumina DpnII expression Adapter 1				ACAGGTTCAGAGTTCTACAGTCCGAC
+Illumina DpnII expression Adapter 2				CAAGCAGAAGACGGCATACGA
+Illumina DpnII expression PCR Primer 1			CAAGCAGAAGACGGCATACGA
+Illumina DpnII expression PCR Primer 2			AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
+Illumina DpnII expression Sequencing Primer		CGACAGGTTCAGAGTTCTACAGTCCGACGATC
+
+Illumina NlaIII expression Adapter 1			ACAGGTTCAGAGTTCTACAGTCCGACATG
+Illumina NlaIII expression Adapter 2			CAAGCAGAAGACGGCATACGA
+Illumina NlaIII expression PCR Primer 1			CAAGCAGAAGACGGCATACGA
+Illumina NlaIII expression PCR Primer 2			AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
+Illumina NlaIII expression Sequencing Primer	CCGACAGGTTCAGAGTTCTACAGTCCGACATG
+
+Illumina Small RNA Adapter 1					GTTCAGAGTTCTACAGTCCGACGATC
+Illumina Small RNA Adapter 2					TGGAATTCTCGGGTGCCAAGG
+Illumina Small RNA RT Primer					CAAGCAGAAGACGGCATACGA
+Illumina Small RNA PCR Primer 1					CAAGCAGAAGACGGCATACGA
+Illumina Small RNA PCR Primer 2					AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
+Illumina Small RNA Sequencing Primer			CGACAGGTTCAGAGTTCTACAGTCCGACGATC
+
+Illumina Multiplexing Adapter 1					GATCGGAAGAGCACACGTCT
+Illumina Multiplexing Adapter 2					ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+Illumina Multiplexing PCR Primer 1.01			AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
+Illumina Multiplexing PCR Primer 2.01			GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
+Illumina Multiplexing Read1 Sequencing Primer	ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+Illumina Multiplexing Index Sequencing Primer	GATCGGAAGAGCACACGTCTGAACTCCAGTCAC
+Illumina Multiplexing Read2 Sequencing Primer	GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
+
+Illumina PCR Primer Index 1						CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTC
+Illumina PCR Primer Index 2						CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTC
+Illumina PCR Primer Index 3						CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTC
+Illumina PCR Primer Index 4						CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTC
+Illumina PCR Primer Index 5						CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTC
+Illumina PCR Primer Index 6						CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTC
+Illumina PCR Primer Index 7						CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTC
+Illumina PCR Primer Index 8						CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTC
+Illumina PCR Primer Index 9						CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTC
+Illumina PCR Primer Index 10					CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTC
+Illumina PCR Primer Index 11					CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTC
+Illumina PCR Primer Index 12					CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTC
+
+Illumina DpnII Gex Adapter 1					GATCGTCGGACTGTAGAACTCTGAAC
+Illumina DpnII Gex Adapter 1.01					ACAGGTTCAGAGTTCTACAGTCCGAC
+Illumina DpnII Gex Adapter 2					CAAGCAGAAGACGGCATACGA
+Illumina DpnII Gex Adapter 2.01					TCGTATGCCGTCTTCTGCTTG
+Illumina DpnII Gex PCR Primer 1					CAAGCAGAAGACGGCATACGA
+Illumina DpnII Gex PCR Primer 2					AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
+Illumina DpnII Gex Sequencing Primer			CGACAGGTTCAGAGTTCTACAGTCCGACGATC
+
+Illumina NlaIII Gex Adapter 1.01				TCGGACTGTAGAACTCTGAAC
+Illumina NlaIII Gex Adapter 1.02				ACAGGTTCAGAGTTCTACAGTCCGACATG
+Illumina NlaIII Gex Adapter 2.01				CAAGCAGAAGACGGCATACGA
+Illumina NlaIII Gex Adapter 2.02				TCGTATGCCGTCTTCTGCTTG
+Illumina NlaIII Gex PCR Primer 1				CAAGCAGAAGACGGCATACGA
+Illumina NlaIII Gex PCR Primer 2				AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
+Illumina NlaIII Gex Sequencing Primer			CCGACAGGTTCAGAGTTCTACAGTCCGACATG
+
+Illumina Small RNA RT Primer					CAAGCAGAAGACGGCATACGA
+Illumina 5p RNA Adapter							GTTCAGAGTTCTACAGTCCGACGATC
+Illumina RNA Adapter1							TGGAATTCTCGGGTGCCAAGG
+
+Illumina Small RNA 3p Adapter 1					ATCTCGTATGCCGTCTTCTGCTTG
+Illumina Small RNA PCR Primer 1					CAAGCAGAAGACGGCATACGA
+Illumina Small RNA PCR Primer 2					AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
+Illumina Small RNA Sequencing Primer			CGACAGGTTCAGAGTTCTACAGTCCGACGATC
+
+TruSeq Universal Adapter						AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
+TruSeq Adapter, Index 1							GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 2							GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 3							GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 4							GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 5							GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 6							GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 7							GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 8							GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 9							GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 10						GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 11						GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 12						GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 13						GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTCAACTCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 14						GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTTCCGTCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 15						GATCGGAAGAGCACACGTCTGAACTCCAGTCACATGTCAGTCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 16						GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCGTCCCTCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 18						GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 19						GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGAAACTCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 20						GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGGCCTTCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 21						GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTTTCGGTCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 22						GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGTACGTTCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 23						GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCACTCTTCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 25						GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTGATATCTCGTATGCCGTCTTCTGCTTG
+TruSeq Adapter, Index 27						GATCGGAAGAGCACACGTCTGAACTCCAGTCACATTCCTTTCTCGTATGCCGTCTTCTGCTTG
+
+Illumina RNA RT Primer							GCCTTGGCACCCGAGAATTCCA
+Illumina RNA PCR Primer							AATGATACGGCGACCACCGAGATCTACACGTTCAGAGTTCTACAGTCCGA
+
+RNA PCR Primer, Index 1							CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 2							CAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 3							CAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 4							CAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 5							CAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 6							CAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 7							CAAGCAGAAGACGGCATACGAGATGATCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 8							CAAGCAGAAGACGGCATACGAGATTCAAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 9							CAAGCAGAAGACGGCATACGAGATCTGATCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 10						CAAGCAGAAGACGGCATACGAGATAAGCTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 11						CAAGCAGAAGACGGCATACGAGATGTAGCCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 12						CAAGCAGAAGACGGCATACGAGATTACAAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 13						CAAGCAGAAGACGGCATACGAGATTTGACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 14						CAAGCAGAAGACGGCATACGAGATGGAACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 15						CAAGCAGAAGACGGCATACGAGATTGACATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 16						CAAGCAGAAGACGGCATACGAGATGGACGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 17						CAAGCAGAAGACGGCATACGAGATCTCTACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 18						CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 19						CAAGCAGAAGACGGCATACGAGATTTTCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 20						CAAGCAGAAGACGGCATACGAGATGGCCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 21						CAAGCAGAAGACGGCATACGAGATCGAAACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 22						CAAGCAGAAGACGGCATACGAGATCGTACGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 23						CAAGCAGAAGACGGCATACGAGATCCACTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 24						CAAGCAGAAGACGGCATACGAGATGCTACCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 25						CAAGCAGAAGACGGCATACGAGATATCAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 26						CAAGCAGAAGACGGCATACGAGATGCTCATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 27						CAAGCAGAAGACGGCATACGAGATAGGAATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 28						CAAGCAGAAGACGGCATACGAGATCTTTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 29						CAAGCAGAAGACGGCATACGAGATTAGTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 30						CAAGCAGAAGACGGCATACGAGATCCGGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 31						CAAGCAGAAGACGGCATACGAGATATCGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 32						CAAGCAGAAGACGGCATACGAGATTGAGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 33						CAAGCAGAAGACGGCATACGAGATCGCCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 34						CAAGCAGAAGACGGCATACGAGATGCCATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 35						CAAGCAGAAGACGGCATACGAGATAAAATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 36						CAAGCAGAAGACGGCATACGAGATTGTTGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 37						CAAGCAGAAGACGGCATACGAGATATTCCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 38						CAAGCAGAAGACGGCATACGAGATAGCTAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 39						CAAGCAGAAGACGGCATACGAGATGTATAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 40						CAAGCAGAAGACGGCATACGAGATTCTGAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 41						CAAGCAGAAGACGGCATACGAGATGTCGTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 42						CAAGCAGAAGACGGCATACGAGATCGATTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 43						CAAGCAGAAGACGGCATACGAGATGCTGTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 44						CAAGCAGAAGACGGCATACGAGATATTATAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 45						CAAGCAGAAGACGGCATACGAGATGAATGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 46						CAAGCAGAAGACGGCATACGAGATTCGGGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 47						CAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+RNA PCR Primer, Index 48						CAAGCAGAAGACGGCATACGAGATTGCCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA
+
+ABI Dynabead EcoP Oligo							CTGATCTAGAGGTACCGGATCCCAGCAGT
+ABI Solid3 Adapter A							CTGCCCCGGGTTCCTCATTCTCTCAGCAGCATG
+ABI Solid3 Adapter B							CCACTACGCCTCCGCTTTCCTCTCTATGGGCAGTCGGTGAT
+ABI Solid3 5' AMP Primer						CCACTACGCCTCCGCTTTCCTCTCTATG
+ABI Solid3 3' AMP Primer						CTGCCCCGGGTTCCTCATTCT
+ABI Solid3 EF1 alpha Sense Primer				CATGTGTGTTGAGAGCTTC
+ABI Solid3 EF1 alpha Antisense Primer			GAAAACCAAAGTGGTCCAC
+ABI Solid3 GAPDH Forward Primer					TTAGCACCCCTGGCCAAGG
+ABI Solid3 GAPDH Reverse Primer					CTTACTCCTTGGAGGCCATG
diff --git a/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt b/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt
index 74938a52b7d505b1185b1962ffe7234ddb304a52..02b9e3f0cbf01c6ce54fa715df93d7cfc6ba4bab 100644
--- a/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt
+++ b/flexiprep/src/test/resources/v0101.fq_fastqc/fastqc_data.txt
@@ -1,29 +1,13 @@
-====
-    Biopet is built on top of GATK Queue for building bioinformatic
-    pipelines. It is mainly intended to support LUMC SHARK cluster which is running
-    SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
-    should also be able to execute Biopet tools and pipelines.
-
-    Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
-
-    Contact us at: sasc@lumc.nl
-
-    A dual licensing mode is applied. The source code within this project that are
-    not part of GATK Queue is freely available for non-commercial use under an AGPL
-    license; For commercial users or users who do not want to follow the AGPL
-    license, please contact us to obtain a separate license.
-====
-
-##FastQC	0.10.1
+##FastQC	0.11.2
 >>Basic Statistics	pass
-#Measure	Value	
-Filename	ct_r1.fq	
-File type	Conventional base calls	
-Encoding	Sanger / Illumina 1.9	
-Total Sequences	1000	
-Filtered Sequences	0	
-Sequence length	100	
-%GC	53	
+#Measure	Value
+Filename	ct_r1.fq.gz
+File type	Conventional base calls
+Encoding	Sanger / Illumina 1.9
+Total Sequences	1000
+Sequences flagged as poor quality	0
+Sequence length	100
+%GC	53
 >>END_MODULE
 >>Per base sequence quality	fail
 #Base	Mean	Median	Lower Quartile	Upper Quartile	10th Percentile	90th Percentile
@@ -36,26 +20,111 @@ Sequence length	100
 7	35.783	37.0	35.0	37.0	35.0	37.0
 8	36.008	37.0	35.0	37.0	35.0	37.0
 9	37.706	39.0	37.0	39.0	35.0	39.0
-10-14	37.857600000000005	39.2	37.2	39.4	34.8	39.4
-15-19	38.9788	40.2	38.0	41.0	35.0	41.0
-20-24	38.8246	40.0	38.0	41.0	34.8	41.0
-25-29	38.589600000000004	40.0	38.0	41.0	34.4	41.0
-30-34	38.3568	40.0	38.0	41.0	33.8	41.0
-35-39	38.1592	40.0	37.4	41.0	33.6	41.0
-40-44	37.4808	39.8	36.0	41.0	32.6	41.0
-45-49	36.9478	39.0	35.0	40.8	31.2	41.0
-50-54	35.845600000000005	37.8	34.6	40.0	29.4	41.0
-55-59	34.739	36.6	33.6	40.0	27.4	41.0
-60-64	34.1336	35.4	33.4	38.6	27.2	40.2
-65-69	32.7464	35.0	32.6	37.2	24.6	39.6
-70-74	29.3478	34.0	29.6	35.6	2.0	38.6
-75-79	27.4908	33.2	26.4	35.0	2.0	36.6
-80-84	25.893000000000008	33.0	21.8	35.0	2.0	35.4
-85-89	25.031799999999997	32.4	16.2	34.6	2.0	35.0
-90-94	23.9446	31.4	6.4	34.0	2.0	35.0
-95-99	22.9358	30.4	2.0	34.0	2.0	35.0
+10-11	37.709	39.0	37.0	39.0	35.0	39.0
+12-13	37.6135	39.0	37.0	39.0	35.0	39.0
+14-15	38.793	40.0	38.0	41.0	34.5	41.0
+16-17	39.033500000000004	40.5	38.0	41.0	35.0	41.0
+18-19	38.942	40.0	38.0	41.0	35.0	41.0
+20-21	38.888	40.0	38.0	41.0	35.0	41.0
+22-23	38.807	40.0	38.0	41.0	35.0	41.0
+24-25	38.702	40.0	38.0	41.0	34.0	41.0
+26-27	38.65	40.0	38.0	41.0	34.5	41.0
+28-29	38.4885	40.0	38.0	41.0	34.5	41.0
+30-31	38.307	40.0	38.0	41.0	34.0	41.0
+32-33	38.433499999999995	40.0	38.0	41.0	34.0	41.0
+34-35	38.3425	40.0	38.0	41.0	33.5	41.0
+36-37	38.1185	40.0	37.5	41.0	33.5	41.0
+38-39	38.088499999999996	40.0	37.0	41.0	33.5	41.0
+40-41	37.555	40.0	36.0	41.0	32.5	41.0
+42-43	37.504999999999995	40.0	36.0	41.0	33.0	41.0
+44-45	37.167	39.0	35.5	41.0	32.0	41.0
+46-47	36.980999999999995	39.0	35.0	41.0	31.0	41.0
+48-49	36.8635	39.0	35.0	40.5	31.0	41.0
+50-51	36.4125	38.5	35.0	40.0	30.5	41.0
+52-53	35.528000000000006	37.5	34.5	40.0	28.5	41.0
+54-55	34.925	37.0	33.5	40.0	27.5	41.0
+56-57	34.8735	37.0	34.0	40.0	27.5	41.0
+58-59	34.7225	36.0	33.5	40.0	28.0	41.0
+60-61	34.67400000000001	36.0	34.0	39.0	28.5	40.5
+62-63	33.841499999999996	35.0	33.0	38.5	26.5	40.0
+64-65	33.549	35.0	33.0	38.0	26.0	40.0
+66-67	32.971999999999994	35.0	33.0	37.0	26.0	40.0
+68-69	32.1635	35.0	32.0	37.0	22.5	39.0
+70-71	30.002000000000002	34.0	30.5	36.0	2.0	39.0
+72-73	29.0695	34.0	29.0	35.5	2.0	38.5
+74-75	28.641	34.0	29.0	35.0	2.0	38.0
+76-77	27.8495	33.0	27.5	35.0	2.0	36.0
+78-79	26.5345	33.0	24.0	35.0	2.0	36.5
+80-81	26.140500000000003	33.0	23.0	35.0	2.0	36.0
+82-83	25.784	33.0	21.5	35.0	2.0	35.0
+84-85	25.6115	33.0	20.0	35.0	2.0	35.0
+86-87	25.1755	33.0	17.0	35.0	2.0	35.0
+88-89	24.600499999999997	31.5	13.5	34.0	2.0	35.0
+90-91	24.088	31.5	6.5	34.0	2.0	35.0
+92-93	24.16	32.0	8.5	34.0	2.0	35.0
+94-95	23.02	30.0	2.0	34.0	2.0	35.0
+96-97	23.183	30.5	2.0	34.0	2.0	35.0
+98-99	22.75	30.5	2.0	34.0	2.0	35.0
 100	21.984	30.0	2.0	34.0	2.0	35.0
 >>END_MODULE
+>>Per tile sequence quality	pass
+#Tile	Base	Mean
+1101	1	0.0
+1101	2	0.0
+1101	3	0.0
+1101	4	0.0
+1101	5	0.0
+1101	6	0.0
+1101	7	0.0
+1101	8	0.0
+1101	9	0.0
+1101	10-11	0.0
+1101	12-13	0.0
+1101	14-15	0.0
+1101	16-17	0.0
+1101	18-19	0.0
+1101	20-21	0.0
+1101	22-23	0.0
+1101	24-25	0.0
+1101	26-27	0.0
+1101	28-29	0.0
+1101	30-31	0.0
+1101	32-33	0.0
+1101	34-35	0.0
+1101	36-37	0.0
+1101	38-39	0.0
+1101	40-41	0.0
+1101	42-43	0.0
+1101	44-45	0.0
+1101	46-47	0.0
+1101	48-49	0.0
+1101	50-51	0.0
+1101	52-53	0.0
+1101	54-55	0.0
+1101	56-57	0.0
+1101	58-59	0.0
+1101	60-61	0.0
+1101	62-63	0.0
+1101	64-65	0.0
+1101	66-67	0.0
+1101	68-69	0.0
+1101	70-71	0.0
+1101	72-73	0.0
+1101	74-75	0.0
+1101	76-77	0.0
+1101	78-79	0.0
+1101	80-81	0.0
+1101	82-83	0.0
+1101	84-85	0.0
+1101	86-87	0.0
+1101	88-89	0.0
+1101	90-91	0.0
+1101	92-93	0.0
+1101	94-95	0.0
+1101	96-97	0.0
+1101	98-99	0.0
+1101	100	0.0
+>>END_MODULE
 >>Per sequence quality scores	pass
 #Quality	Count
 11	1.0
@@ -99,57 +168,53 @@ Sequence length	100
 7	20.9	24.7	32.6	21.8
 8	20.0	27.200000000000003	30.0	22.8
 9	24.5	21.5	27.800000000000004	26.200000000000003
-10-14	25.22	23.28	26.26	25.240000000000002
-15-19	26.44	21.34	26.1	26.119999999999997
-20-24	25.240000000000002	22.1	24.6	28.060000000000002
-25-29	24.62	22.06	25.119999999999997	28.199999999999996
-30-34	26.240000000000002	21.44	24.279999999999998	28.04
-35-39	24.8	22.439999999999998	24.34	28.42
-40-44	25.8	22.84	23.9	27.46
-45-49	26.26	22.64	23.66	27.439999999999998
-50-54	26.72	22.58	23.18	27.52
-55-59	25.019999999999996	22.58	24.38	28.02
-60-64	26.251501802162597	22.00640768922707	23.28794553464157	28.454144973968766
-65-69	25.683829444891394	23.873692679002414	23.049074818986323	27.39340305711987
-70-74	25.554134697357206	25.44757033248082	21.717817561807333	27.28047740835465
-75-79	25.818501428257523	23.643155350472423	23.071852340145025	27.466490881125026
-80-84	26.973532796317606	23.95857307249712	21.74913693901036	27.318757192174914
-85-89	25.452016689847014	24.849327770050998	22.624014835419565	27.07464070468243
-90-94	24.547101449275363	22.35054347826087	24.139492753623188	28.962862318840582
-95-99	25.318837549655026	24.231653773782146	23.186284758519758	27.263223918043067
+10-11	25.15	24.0	27.55	23.3
+12-13	26.200000000000003	22.3	24.65	26.85
+14-15	24.75	21.95	26.3	27.0
+16-17	25.4	21.7	26.55	26.35
+18-19	27.650000000000002	21.6	25.85	24.9
+20-21	24.8	21.8	24.3	29.099999999999998
+22-23	25.900000000000002	23.05	24.15	26.900000000000002
+24-25	24.85	21.4	25.900000000000002	27.85
+26-27	24.7	20.849999999999998	25.0	29.45
+28-29	24.4	23.3	24.95	27.35
+30-31	27.35	20.95	25.15	26.55
+32-33	24.9	22.05	23.400000000000002	29.65
+34-35	25.6	22.15	25.900000000000002	26.35
+36-37	24.95	21.2	23.400000000000002	30.45
+38-39	24.8	23.35	23.7	28.15
+40-41	27.0	23.35	23.599999999999998	26.05
+42-43	25.15	22.35	23.799999999999997	28.7
+44-45	26.200000000000003	20.7	24.3	28.799999999999997
+46-47	26.3	24.0	23.150000000000002	26.55
+48-49	25.5	23.3	24.05	27.150000000000002
+50-51	27.55	22.75	23.7	26.0
+52-53	24.45	23.400000000000002	23.1	29.049999999999997
+54-55	27.450000000000003	21.85	23.0	27.700000000000003
+56-57	25.85	22.15	23.5	28.499999999999996
+58-59	24.05	22.75	25.6	27.6
+60-61	25.25	20.95	23.45	30.349999999999998
+62-63	27.3	21.9	23.7	27.1
+64-65	26.178535606820464	24.57372116349047	22.617853560682047	26.629889669007024
+66-67	25.7	23.75	22.05	28.499999999999996
+68-69	25.405679513184587	23.52941176470588	24.036511156186613	27.028397565922923
+70-71	25.159574468085104	23.085106382978722	23.138297872340424	28.617021276595743
+72-73	26.031065881092662	26.513122656668454	20.51419389394751	26.941617568291377
+74-75	25.197680548234054	26.56826568265683	21.929362150764366	26.304691618344755
+76-77	25.911812738160044	23.51660315732172	24.550898203592812	26.02068590092542
+78-79	26.16345062429058	22.985244040862657	21.793416572077184	29.05788876276958
+80-81	26.98324022346369	25.474860335195533	21.005586592178773	26.536312849162012
+82-83	26.46370023419204	24.355971896955502	22.131147540983605	27.049180327868854
+84-85	26.124567474048444	23.18339100346021	22.145328719723185	28.546712802768166
+86-87	25.976331360946748	25.443786982248522	22.36686390532544	26.21301775147929
+88-89	25.503742084052966	23.54634427173287	23.316062176165804	27.63385146804836
+90-91	23.832052040212893	21.525724423418097	25.901833234772326	28.74039030159669
+92-93	24.525139664804467	22.849162011173185	23.743016759776538	28.88268156424581
+94-95	25.161987041036717	24.028077753779698	22.4622030237581	28.347732181425485
+96-97	25.37393162393162	24.412393162393162	23.664529914529915	26.549145299145298
+98-99	25.67703109327984	23.620862587763288	22.71815446339017	27.9839518555667
 100	24.0	26.0	21.9	28.1
 >>END_MODULE
->>Per base GC content	fail
-#Base	%GC
-1	71.01303911735206
-2	64.1
-3	73.3
-4	65.3
-5	55.800000000000004
-6	87.3
-7	42.699999999999996
-8	42.8
-9	50.7
-10-14	50.46000000000001
-15-19	52.559999999999995
-20-24	53.300000000000004
-25-29	52.82
-30-34	54.279999999999994
-35-39	53.22
-40-44	53.26
-45-49	53.7
-50-54	54.24
-55-59	53.04
-60-64	54.70564677613135
-65-69	53.07723250201126
-70-74	52.834612105711855
-75-79	53.28499230938255
-80-84	54.29228998849251
-85-89	52.526657394529444
-90-94	53.509963768115945
-95-99	52.5820614676981
-100	52.1
->>END_MODULE
 >>Per sequence GC content	fail
 #GC Content	Count
 0	0.0
@@ -265,24 +330,51 @@ Sequence length	100
 7	0.0
 8	0.0
 9	0.0
-10-14	0.0
-15-19	0.0
-20-24	0.0
-25-29	0.0
-30-34	0.0
-35-39	0.0
-40-44	0.0
-45-49	0.0
-50-54	0.0
-55-59	0.0
-60-64	0.12
-65-69	0.5599999999999999
-70-74	6.16
-75-79	8.98
-80-84	13.100000000000001
-85-89	13.719999999999999
-90-94	11.68
-95-99	4.34
+10-11	0.0
+12-13	0.0
+14-15	0.0
+16-17	0.0
+18-19	0.0
+20-21	0.0
+22-23	0.0
+24-25	0.0
+26-27	0.0
+28-29	0.0
+30-31	0.0
+32-33	0.0
+34-35	0.0
+36-37	0.0
+38-39	0.0
+40-41	0.0
+42-43	0.0
+44-45	0.0
+46-47	0.0
+48-49	0.0
+50-51	0.0
+52-53	0.0
+54-55	0.0
+56-57	0.0
+58-59	0.0
+60-61	0.0
+62-63	0.0
+64-65	0.3
+66-67	0.0
+68-69	1.4000000000000001
+70-71	6.0
+72-73	6.65
+74-75	5.1499999999999995
+76-77	8.15
+78-79	11.899999999999999
+80-81	10.5
+82-83	14.6
+84-85	13.3
+86-87	15.5
+88-89	13.15
+90-91	15.45
+92-93	10.5
+94-95	7.3999999999999995
+96-97	6.4
+98-99	0.3
 100	0.0
 >>END_MODULE
 >>Sequence Length Distribution	pass
@@ -290,565 +382,85 @@ Sequence length	100
 100	1000.0
 >>END_MODULE
 >>Sequence Duplication Levels	pass
-#Total Duplicate Percentage	3.4
-#Duplication Level	Relative count
-1	100.0
-2	0.4140786749482402
-3	0.0
-4	0.0
-5	0.0
-6	0.0
-7	0.0
-8	0.0
-9	0.0
-10++	0.2070393374741201
+#Total Deduplicated Percentage	97.2
+#Duplication Level	Percentage of deduplicated	Percentage of total
+1	99.38271604938271	96.6
+2	0.411522633744856	0.8
+3	0.0	0.0
+4	0.0	0.0
+5	0.0	0.0
+6	0.0	0.0
+7	0.0	0.0
+8	0.0	0.0
+9	0.0	0.0
+>10	0.205761316872428	2.6
+>50	0.0	0.0
+>100	0.0	0.0
+>500	0.0	0.0
+>1k	0.0	0.0
+>5k	0.0	0.0
+>10k+	0.0	0.0
 >>END_MODULE
 >>Overrepresented sequences	fail
 #Sequence	Count	Percentage	Possible Source
-AGATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTAT	14	1.4000000000000001	TruSeq Adapter, Index 1 (97% over 36bp)
-GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATG	12	1.2	TruSeq Adapter, Index 1 (97% over 36bp)
+AGATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTAT	14	1.4000000000000001	TruSeq Adapter, Index 18 (97% over 37bp)
+GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATG	12	1.2	TruSeq Adapter, Index 18 (97% over 37bp)
 AGGGGGAATGATGGTTGTCTTTGGATATACTACAGCGATGGCTATTGAGG	2	0.2	No Hit
 GGCTTGTTTTATTTTAATGGCTGATCTATGTAATCACAGAGGCCAGTATG	2	0.2	No Hit
 GTGGGGTGGTGTTTGTGGGGGACTTCATCATCTCAGGCTTCCCAGGGTCC	2	0.2	No Hit
-CGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATGCCG	2	0.2	TruSeq Adapter, Index 1 (96% over 33bp)
+CGGAAGAGCACACGTCTGAACTCCAGTCACTTCCAAGATCTCGTATGCCG	2	0.2	TruSeq Adapter, Index 18 (97% over 34bp)
+>>END_MODULE
+>>Adapter Content	fail
+#Position	Illumina Universal Adapter	Illumina Small RNA Adapter	Nextera Transposase Sequence
+1	1.4	0.0	0.0
+2	1.4	0.0	0.0
+3	1.4	0.0	0.0
+4	1.4	0.0	0.0
+5	1.4	0.0	0.0
+6	1.4	0.0	0.0
+7	1.4	0.0	0.0
+8	1.4	0.0	0.0
+9	1.4	0.0	0.0
+10-11	1.4	0.0	0.0
+12-13	1.4	0.0	0.0
+14-15	1.4	0.0	0.0
+16-17	1.4	0.0	0.0
+18-19	1.4	0.0	0.0
+20-21	1.5	0.0	0.0
+22-23	1.5	0.0	0.0
+24-25	1.5	0.0	0.0
+26-27	1.5	0.0	0.0
+28-29	1.6	0.0	0.0
+30-31	1.7	0.0	0.0
+32-33	1.9	0.0	0.0
+34-35	2.4	0.0	0.0
+36-37	2.45	0.0	0.0
+38-39	2.95	0.0	0.0
+40-41	3.25	0.0	0.0
+42-43	3.75	0.0	0.0
+44-45	4.2	0.0	0.0
+46-47	4.9	0.0	0.0
+48-49	5.699999999999999	0.0	0.0
+50-51	6.300000000000001	0.0	0.0
+52-53	6.949999999999999	0.0	0.0
+54-55	7.65	0.0	0.0
+56-57	8.399999999999999	0.0	0.0
+58-59	9.350000000000001	0.0	0.0
+60-61	9.899999999999999	0.0	0.0
+62-63	10.600000000000001	0.0	0.0
+64-65	11.3	0.0	0.0
+66-67	12.0	0.0	0.0
+68-69	13.05	0.0	0.0
+70-71	13.6	0.0	0.0
+72-73	14.5	0.0	0.0
+74-75	15.55	0.0	0.0
+76-77	16.15	0.0	0.0
+78-79	17.2	0.0	0.0
+80-81	17.700000000000003	0.0	0.0
+82-83	18.15	0.0	0.0
+84-85	18.75	0.0	0.0
+86-87	19.799999999999997	0.0	0.0
+88	20.6	0.0	0.0
 >>END_MODULE
->>Kmer Content	fail
-#Sequence	Count	Obs/Exp Overall	Obs/Exp Max	Max Obs/Exp Position
-AAAAA	385	7.3597403	68.038994	65-69
-AGATC	435	5.4375157	23.135067	1
-GAAGA	375	5.258809	32.443344	6
-GGAAG	420	5.044668	33.345257	5
-TCCAG	475	4.8355613	14.131038	2
-AAGAG	320	4.487517	25.954676	7
-CCAGG	475	4.4180827	17.21471	3
-GAGCA	380	4.3399205	21.1377	9
-AGCAC	395	4.2895336	15.0741825	7
-CTCCA	415	4.0171337	12.105032	95-96
-AGAGC	340	3.883087	21.137697	8
-TTTTT	280	3.8749053	8.964593	10-14
-CTTCT	370	3.8646336	11.598914	55-59
-CTGAA	305	3.812511	13.130004	90-94
-CGGAA	320	3.65467	26.422123	5
-ACCAG	335	3.6379597	10.049457	7
-TCTGA	310	3.6325634	12.308498	90-94
-CACAC	340	3.5108058	14.806036	85-89
-ATCGG	325	3.4795394	24.768969	3
-TCGGA	320	3.426008	19.815174	3
-GATCG	320	3.426008	19.815174	1
-CGTCT	355	3.387832	11.578538	85-89
-CTGCT	355	3.387832	17.662533	3
-GCACA	310	3.3664696	15.0741825	8
-TCTTC	320	3.3423858	7.7326093	50-54
-CAGCA	305	3.3121717	10.049455	6
-GAACT	260	3.2500093	13.130004	90-94
-GTCTG	320	3.2116532	12.65067	90-94
-CAGGA	280	3.197836	15.8532715	3
-AACTC	265	3.1497202	23.781752	95-96
-TGAAC	250	3.125009	13.130004	90-94
-CCAGC	350	3.0954454	6.6359653	95-96
-AGTCA	240	3.0000086	10.41078	25-29
-CACCA	290	2.9945107	6.079907	70-74
-TGCTG	295	2.960743	9.2877	2
-CAGAT	230	2.875008	11.040063	70-74
-CTTCC	315	2.8583732	10.916445	30-34
-CACGT	280	2.8504362	12.351324	85-89
-CAGGG	290	2.8367646	22.630535	9
-ACACG	260	2.8234906	13.175687	85-89
-TTCCA	250	2.7855206	9.279795	30-34
-TTCTT	230	2.765239	6.6755276	50-54
-AGCAG	240	2.7410026	15.853272	2
-TTCTG	240	2.6363494	10.165324	55-59
-ACTCC	270	2.6135564	14.526036	95-96
-GCCAG	280	2.6043434	8.607355	1
-ACGTC	255	2.595933	10.105629	85-89
-GATCT	220	2.5779483	8.675031	40-44
-TCTGC	265	2.5289452	13.2469015	2
-AAGAT	160	2.4557784	12.783248	35-39
-ATCTC	220	2.4512577	9.279794	40-44
-CAGTC	240	2.4432309	8.554544	90-94
-TCCAA	205	2.4365761	10.999062	7
-CTTTT	200	2.4045558	16.688818	6
-TTCCT	230	2.40234	9.665762	7
-CCAGT	235	2.3923304	9.4206915	25-29
-TTTCT	195	2.3444414	16.688818	8
-CTGGG	255	2.3383298	6.004135	80-84
-TGCTT	210	2.3068056	10.165323	4
-TCTTT	190	2.284328	5.5629396	15-19
-TTTTC	190	2.2843277	11.125878	7
-GGGGG	255	2.2468696	16.307867	2
-AGGAA	160	2.2437584	19.466007	5
-GTCAC	220	2.2396283	10.184532	95-96
-TCACT	200	2.2284167	8.360176	95-96
-CACTT	200	2.2284167	10.3108835	30-34
-GAAAA	135	2.2103586	10.606119	60-64
-ACTTC	195	2.172706	9.279794	30-34
-TTGAA	150	2.1582448	11.9834385	60-64
-CTCCT	235	2.1324375	16.794533	4
-TCCTC	235	2.1324372	8.397265	5
-ATCTT	165	2.11616	7.1210704	10-14
-GGGGA	205	2.1089406	14.2801	3
-ACACA	165	2.092039	11.7331705	8
-TGCAG	195	2.0877237	9.907587	5
-GACCA	190	2.0633202	10.049455	6
-AGGGG	200	2.057503	9.520067	1
-CCTCC	260	2.049668	14.590484	5
-AGGAG	170	2.0418897	5.557543	2
-TCCTT	195	2.0367663	14.498643	4
-GTCTT	185	2.032186	15.247986	7
-GCTGG	220	2.0173824	8.485845	1
-CCAGA	185	2.0090222	5.3284492	70-74
-CCTGG	230	2.0054333	8.068818	3
-GCAGG	205	2.005299	9.052214	3
-GGACC	215	1.9997637	8.607355	5
-TTCAT	155	1.987908	5.934226	2
-CCTTT	190	1.9845415	14.498643	5
-TTTCC	190	1.9845415	5.799457	15-19
-TGGCA	185	1.980661	14.861383	2
-TCTTG	180	1.977262	10.165323	5
-CCAAG	180	1.9547247	9.044511	35-39
-CTTCA	175	1.9498644	10.310883	6
-CAAGA	145	1.933477	12.339583	35-39
-CTGGA	180	1.9271295	9.907587	6
-GGCTG	210	1.9256833	16.97169	2
-AATGA	125	1.918577	7.677627	95-96
-TGAAA	125	1.918577	15.623971	60-64
-GCTTC	200	1.9086379	13.2469015	2
-GTCCA	185	1.8833237	14.131036	1
-AGAAA	115	1.882898	7.5757995	7
-TGGGG	195	1.8805519	13.386638	1
-TTCTC	180	1.880092	5.799457	25-29
-CTTGA	160	1.8748715	8.675031	60-64
-ACAAA	120	1.8682072	5.762797	40-44
-TCTCG	195	1.8609219	8.831266	5
-GGGAC	190	1.8585701	9.052216	5
-TGAGG	165	1.8578365	5.209824	2
-TGAAG	140	1.8404517	6.082693	2
-CATCT	165	1.8384434	5.155441	4
-CACTG	180	1.8324232	9.4206915	6
-CTGCA	180	1.8324231	5.3465896	90-94
-GCTGC	210	1.8310483	8.068819	1
-GCAGA	160	1.8273348	10.568848	3
-CCTTC	200	1.8148402	8.397265	9
-AGGGA	150	1.8016673	6.0081544	95-96
-TTTCA	140	1.7955297	7.1210704	15-19
-CACAG	165	1.7918309	5.432139	95-96
-AAACA	115	1.7903653	7.6389136	70-74
-ATTTT	120	1.7715117	13.661307	6
-TTTTG	140	1.7701824	17.551357	7
-GGGGC	210	1.7594293	11.629828	3
-GATTT	130	1.7534488	12.481857	6
-CAAAT	120	1.7513192	6.7527947	50-54
-GAGGG	170	1.7488776	9.520067	1
-GAAGG	145	1.7416117	6.0081544	95-96
-CATTT	135	1.7314036	5.9342256	5
-ATTTC	135	1.7314036	5.9342256	7
-CCTCT	190	1.7240983	8.397266	1
-ATCCA	145	1.7234317	5.49953	4
-GCAGC	185	1.7207267	6.9789357	95-96
-TCCTG	180	1.717774	13.2469	2
-CTCTG	180	1.717774	13.2469	2
-AAAAC	110	1.7125233	7.6389136	70-74
-CTTGG	170	1.7061908	9.2877	2
-AAAAT	95	1.7024158	8.291661	9
-TCACC	175	1.693972	8.957724	8
-TCCAC	175	1.693972	8.957724	5
-GAGAA	120	1.6828189	6.488669	6
-TCTCC	185	1.6787271	5.038359	55-59
-GAGCC	180	1.6742208	8.607355	9
-TCATC	150	1.6713123	5.1554413	2
-AGACA	125	1.6667906	6.169792	2
-TGATG	135	1.6636823	11.404236	9
-GGGAG	160	1.6460025	9.520067	1
-AGCCA	150	1.6289369	6.029673	10-14
-ATGCC	160	1.6288207	8.478622	45-49
-CTCGT	170	1.6223421	8.831266	3
-GAGGA	135	1.6215005	11.115086	3
-TGTTG	140	1.6173534	10.690706	2
-CTCAT	145	1.6156021	5.1554418	2
-CAGGT	150	1.6059413	9.907587	4
-GCTTG	160	1.6058266	9.2877	60-64
-GGGTC	175	1.6047363	12.728768	2
-TCATT	125	1.6031516	5.934226	9
-GTTGA	130	1.6020645	5.702118	1
-ACAGA	120	1.6001189	10.005068	95-96
-GGAGG	155	1.5945649	9.520067	2
-GGGGT	165	1.5912362	13.386638	1
-TGGGA	140	1.5763463	10.419649	2
-GGATG	140	1.5763462	15.629472	6
-GCCTC	190	1.575248	7.672287	2
-CCTGC	190	1.5752479	11.508429	2
-GCTCC	190	1.5752479	11.508429	6
-TCTCT	150	1.5667434	5.224736	95-96
-GGGAA	130	1.561445	11.115086	4
-TCCAT	140	1.5598917	10.3108835	8
-GGCTT	155	1.5556445	13.93155	1
-TTGAT	115	1.5511277	6.240928	4
-CATCA	130	1.5451456	5.49953	2
-AGAGA	110	1.542584	6.488669	9
-AGGAC	135	1.541814	6.341309	55-59
-GTATG	125	1.5404466	9.123388	45-49
-AACAT	105	1.5324043	13.5055895	9
-AGCTC	150	1.5270194	9.4206915	5
-TTTGT	120	1.5172992	17.551357	8
-GATGA	115	1.5117996	6.082693	5
-GAGAT	115	1.5117996	6.082693	4
-AGGAT	115	1.5117996	12.165386	4
-TGAGA	115	1.5117996	6.082693	5
-CTGGT	150	1.5054625	9.2877	4
-GCTGT	150	1.5054625	18.5754	3
-TTCAC	135	1.504181	10.310883	7
-CCCAG	170	1.5035021	12.276537	2
-CAGTG	140	1.4988785	9.907587	5
-CTCCC	190	1.4978343	7.295242	1
-CCCTG	180	1.4923402	11.5084305	2
-CAGAG	130	1.4847097	7.398194	20-24
-CTTTG	135	1.4829465	10.165323	2
-CAAAA	95	1.4789973	7.203496	9
-TCTCA	130	1.4484707	5.1554413	8
-GAATG	110	1.4460692	12.165386	7
-GGAAT	110	1.4460692	12.165386	5
-TTTGG	125	1.4440656	5.345353	7
-GGCCT	165	1.4386805	12.103227	1
-GCTCT	150	1.4314783	6.1818867	20-24
-TCTGT	130	1.4280226	15.247986	3
-CTGTT	130	1.4280226	15.247986	4
-AGGTT	115	1.4172109	11.404235	8
-TTGAG	115	1.4172107	5.702117	4
-TTTGA	105	1.416247	7.4891143	10-14
-ATCTG	120	1.4061534	5.4218936	2
-GGTCT	140	1.4050984	9.287701	6
-TTTTA	95	1.4024467	7.384491	95-96
-GGGTG	145	1.3983592	13.386638	2
-GGCAC	150	1.3951839	8.607355	4
-AAAGA	85	1.3917071	7.5757985	8
-AAGAA	85	1.3917071	5.254889	75-79
-TTGTT	110	1.3908576	5.850453	4
-GGAGA	115	1.3812783	5.557543	3
-ATGAC	110	1.3750039	6.252721	95-96
-TGTTC	125	1.3730987	10.165325	5
-GGGCA	140	1.3694727	9.052216	4
-ATGAT	95	1.3668885	6.6574664	6
-CCACT	140	1.3551775	5.3746343	30-34
-TGGCT	135	1.3549163	13.931552	3
-GATGG	120	1.3511539	10.419648	9
-TCGTA	115	1.3475639	5.421894	40-44
-TGTCA	115	1.3475639	5.421894	5
-GCTGA	125	1.3382844	9.907587	6
-CAGAA	100	1.3334324	5.6025352	90-94
-CCAAA	105	1.3312978	5.8665853	8
-GGGCT	145	1.3296387	12.728768	1
-TAGGA	100	1.3146083	12.165386	4
-GACAG	115	1.313397	5.2844243	1
-GGTCC	150	1.3078917	8.068819	6
-CCATC	135	1.3067783	8.957724	9
-AAATG	85	1.3046323	7.101804	6
-TTCAA	95	1.2997144	6.330293	9
-CGTAT	110	1.2889742	8.675031	45-49
-TGACT	110	1.2889742	5.421894	3
-TATGC	110	1.2889739	8.67503	45-49
-GCCCT	155	1.2850707	7.672287	3
-TGGGC	140	1.283789	8.485846	7
-ACTTT	100	1.2825212	5.9342256	1
-ATGTT	95	1.2813665	6.2409286	1
-ATTTG	95	1.2813663	12.481856	9
-TGGTT	110	1.2707777	5.345353	5
-TGGTG	120	1.2666163	9.767722	7
-GTTTT	100	1.2644161	5.8504534	6
-GCCTG	145	1.2642952	12.103229	1
-TTGCT	115	1.2632507	6.0991945	50-54
-CCACC	150	1.2614243	7.7821474	5
-GGACA	110	1.2562928	15.853274	6
-GAAGC	110	1.2562928	10.568849	9
-TGACA	100	1.2500036	5.7837667	9
-GACAT	100	1.2500035	11.567533	7
-TGGAA	95	1.248878	6.082693	5
-ACAGC	115	1.2488517	10.049455	5
-AATCC	105	1.2480024	5.499531	7
-TGCCT	130	1.2406145	8.831266	3
-AGGTG	110	1.2385577	5.209824	4
-GTGGC	135	1.2379395	12.728768	1
-CATGT	105	1.2303842	5.4218936	1
-TAGAT	85	1.2230055	6.0453725	90-94
-CCCTC	155	1.2219174	7.295242	4
-GCCGT	140	1.2206988	8.068819	3
-AGTTT	90	1.2139261	6.2409286	7
-TTTAG	90	1.213926	6.240928	8
-TTGGG	115	1.2138406	9.767722	2
-ACCTC	125	1.20998	8.957724	1
-AGCAA	90	1.2000892	6.169792	9
-CAAAG	90	1.2000891	6.169791	5
-AAAGC	90	1.2000891	6.169791	6
-ACAGG	105	1.1991886	10.568849	8
-AGGCA	105	1.1991886	5.712891	95-96
-ATCAG	95	1.1875033	5.7837663	6
-ATGAG	90	1.1831475	6.082693	25-29
-CAGTT	100	1.1717947	5.1698627	85-89
-ATGCT	100	1.1717947	5.421894	8
-TCAAT	85	1.1629024	6.3302937	10-14
-TGTGT	100	1.1552525	10.690706	3
-GCCCA	130	1.1497369	12.276536	1
-TGATT	85	1.1464858	12.481857	5
-TGCTC	120	1.1451827	8.831267	4
-TGTCC	120	1.1451827	13.2469015	2
-TCCCC	145	1.143084	7.295242	2
-AAGGC	100	1.1420842	5.493164	65-69
-CAACA	90	1.1411123	5.8665853	8
-CACAA	90	1.1411123	11.7331705	9
-ACATC	95	1.129145	5.4995303	8
-AAGCT	90	1.1250031	6.2527194	95-96
-GAAAG	80	1.1218792	12.977338	7
-AAGGA	80	1.1218792	6.488669	3
-GCACT	110	1.1198142	9.4206915	5
-CCTGA	110	1.119814	9.420691	9
-ACCTT	100	1.1142083	5.1554418	7
-GTCAT	95	1.113205	5.421894	1
-TGATC	95	1.113205	10.843788	5
-TCATG	95	1.113205	5.421894	3
-TGGAT	90	1.1091216	5.702118	9
-GTGGG	115	1.1090435	8.924425	1
-CTGTG	110	1.1040058	9.2877	4
-GCTTT	100	1.0984789	5.4947696	95-96
-TGTCT	100	1.0984789	10.165323	5
-TTGGT	95	1.0974898	5.345353	4
-CTGTC	115	1.0974668	17.662535	4
-CAGAC	100	1.0859579	5.0247273	5
-GGAAC	95	1.0849801	5.2844243	6
-CCTCG	130	1.0778012	7.672287	6
-GCGGC	135	1.075477	7.372196	1
-ATAAA	60	1.0752101	8.291662	7
-GGGAT	95	1.0696635	10.419649	3
-CATCC	110	1.0647823	8.957723	3
-ACAGT	85	1.062503	5.7837663	4
-ACTGA	85	1.062503	11.567533	7
-GTTGG	100	1.0555136	9.767722	1
-TGTGG	100	1.0555136	9.767722	5
-GGAAA	75	1.0517617	19.466007	6
-GTGAA	80	1.0516868	6.082693	1
-GAAGT	80	1.0516866	6.082693	5
-GTCTC	110	1.0497508	8.831267	1
-CGGCT	120	1.046313	8.068818	1
-TTTAT	70	1.0333818	5.4645233	10-14
-GACAC	95	1.0316601	10.049455	7
-GGCAA	90	1.0278759	10.56885	3
-TCATA	75	1.0260904	6.330293	5
-ATTCA	75	1.0260903	6.3302927	7
-TAACA	70	1.0216029	6.7527957	8
-GGTCA	95	1.0170963	9.907589	3
-ATGGC	95	1.0170962	9.907587	1
-TCAGG	95	1.0170962	9.907587	8
-GGTGA	90	1.0133655	15.629474	3
-TGTTT	80	1.0115329	5.8504534	5
-TGAAT	70	1.007181	6.6574664	5
-ATTGA	70	1.0071809	6.6574664	7
-AAGTT	70	1.0071809	6.6574664	6
-TTGCC	105	1.0020349	8.831267	2
-CTTGC	105	1.0020349	8.831267	6
-GCAAA	75	1.0000744	6.169792	4
-CATAG	80	1.0000029	6.2527204	95-96
-GACTT	85	0.99602544	5.421894	1
-CTGAT	85	0.99602544	5.421894	4
-CTTGT	90	0.988631	10.165323	3
-AATGG	75	0.98595625	6.082693	8
-AAGGT	75	0.9859562	6.0826926	4
-GATGT	80	0.98588586	5.7021174	7
-GGATT	80	0.98588586	11.404235	5
-GGCGG	115	0.96349704	7.753219	1
-AGAGG	80	0.9608892	5.557543	8
-GAGGT	85	0.95706743	5.2098246	3
-ATGGG	85	0.9570673	5.209824	1
-CCGTC	115	0.95343953	7.672287	4
-TAGCA	75	0.9375027	5.7837667	1
-ACATG	75	0.9375026	5.7837663	2
-TTGCA	80	0.93743575	5.421894	4
-GTTCA	80	0.93743575	5.421894	6
-ATGTC	80	0.93743575	5.421894	5
-TTCAG	80	0.93743575	5.421894	8
-TTGAC	80	0.9374356	5.4218936	2
-GTTCT	85	0.93370706	5.0826616	1
-TTGTC	85	0.93370706	5.0826616	9
-TTTGC	85	0.93370706	5.0826616	3
-ATGGT	75	0.924268	5.7021174	4
-ATGAA	60	0.920917	7.1018047	9
-AGATG	70	0.92022586	6.082693	5
-GCTCA	90	0.91621155	5.092265	95-96
-AGTGC	85	0.9100334	9.907587	2
-AGGGT	80	0.90076935	10.419649	1
-GTAGG	80	0.90076923	10.419648	6
-AGTGG	80	0.90076923	5.209824	2
-TAAAA	50	0.89600843	8.291662	8
-CACAT	75	0.89143026	5.499531	6
-CCATT	80	0.89136666	10.3108835	9
-ATACT	65	0.8892783	6.330293	9
-ACATT	65	0.88927823	6.3302927	7
-GCGGG	105	0.87971467	7.753219	2
-ACACC	85	0.8777014	9.555587	9
-CATAA	60	0.8756596	6.7527947	6
-ACCCT	90	0.8711856	13.436585	1
-GAACA	65	0.8667311	6.169792	7
-ACTGC	85	0.8653109	5.092265	95-96
-GGTAT	70	0.86265016	17.106354	6
-AGTTG	70	0.86265016	5.702118	7
-GAGAC	75	0.85656327	5.2844243	1
-GTGTC	85	0.8530954	13.93155	1
-GTTGC	85	0.8530954	9.2877	1
-ATAGA	55	0.84417385	7.1018047	8
-GAAAT	55	0.84417385	7.1018047	5
-CATTC	75	0.83565605	5.155441	6
-TCACA	70	0.83200157	5.499531	3
-TGCGG	90	0.8252928	8.485845	3
-GCATT	70	0.8202563	5.421894	4
-GAACC	75	0.8144686	5.0247283	6
-CTCGA	80	0.81441027	9.420691	6
-GAATC	65	0.8125023	5.7837667	6
-TACAG	65	0.81250226	11.567533	7
-TGGTA	65	0.80103225	11.404236	5
-AAGAC	60	0.80005944	6.169791	8
-CAAGG	70	0.7994591	5.2844243	2
-ATGTA	55	0.7913565	6.6574664	4
-AATGT	55	0.7913565	6.6574664	3
-CGGCA	85	0.7906042	8.607354	2
-GAGAG	65	0.7807225	5.557543	8
-ACCAT	65	0.7725729	5.499531	8
-TTCTA	60	0.7695128	5.934226	9
-TAGAA	50	0.7674308	7.1018047	9
-GCATC	75	0.7635097	9.4206915	1
-GTTCC	80	0.76345515	8.831267	6
-AGCTT	65	0.76166654	5.421894	1
-TTAGC	65	0.76166654	5.421894	9
-CTGTA	65	0.76166654	5.421894	2
-ACTTG	65	0.7616664	5.4218936	2
-GTGCT	75	0.7527313	9.287701	3
-ATCAT	55	0.7524662	6.3302927	3
-GTTTG	65	0.7509141	5.345353	9
-GTGTT	65	0.7509141	10.690706	1
-GTCAA	60	0.75000215	11.5675335	6
-AATGC	60	0.75000215	6.252721	95-96
-CAAGT	60	0.7500021	5.7837663	9
-GCAAT	60	0.7500021	5.7837663	4
-GCAAG	65	0.74235487	5.2844243	1
-AGTGT	60	0.7394144	5.7021174	1
-TTAGG	60	0.7394144	5.702118	7
-AGCGG	75	0.73364604	9.052214	1
-ATCCT	65	0.72423524	5.155441	4
-ACTCT	65	0.72423524	5.155441	9
-AGTGA	55	0.7230346	6.082693	6
-AATAA	40	0.71680677	8.291662	6
-AACCT	60	0.71314424	5.4995303	1
-ATTCT	55	0.70538664	5.9342256	7
-AGTCT	60	0.7030768	5.421894	3
-GTGCA	65	0.69590795	9.907589	6
-AAAGT	45	0.69068766	7.101804	8
-AACTG	55	0.6875019	5.7837663	1
-CGAAG	60	0.68525064	5.2844243	4
-GATTG	55	0.67779654	5.702118	6
-GTGAT	55	0.67779654	11.404236	4
-TGTTA	50	0.67440337	12.481857	5
-TTGTA	50	0.6744033	6.240928	9
-TATTG	50	0.6744033	6.240928	7
-CTCTA	60	0.6685249	5.1554413	7
-TACCT	60	0.66852486	10.310882	8
-ATGGA	50	0.65730417	6.082693	8
-ATACA	45	0.6567447	6.7527957	6
-ATCAA	45	0.65674466	6.7527947	9
-TGTAA	45	0.6474735	6.6574664	7
-GCGGT	70	0.6418945	8.485846	4
-GGCCG	80	0.63731974	7.372196	2
-GGTTT	55	0.63538885	10.690706	9
-TTGTG	55	0.63538885	5.345353	1
-TATAT	40	0.62991583	7.2865515	8
-CCTGT	65	0.62030727	8.831266	3
-GTGAG	55	0.6192789	5.2098246	1
-TAGGG	55	0.61927885	5.209824	8
-GAGTT	50	0.6161787	5.7021174	6
-ATGTG	50	0.6161787	5.702118	2
-GAATA	40	0.61394465	7.1018047	6
-CTGCG	70	0.6103493	8.068818	2
-CGGTG	65	0.59604484	8.485845	2
-TAAGG	45	0.5915738	6.082693	9
-AAGTG	45	0.5915737	6.0826926	1
-TATTT	40	0.5905039	6.8306537	8
-GGCAT	55	0.5888452	14.861383	3
-GTATC	50	0.5858973	5.421894	4
-ATAAC	40	0.5837731	13.505591	7
-TTACT	45	0.57713455	5.934226	9
-GTATA	40	0.575532	13.314933	7
-GAGTG	50	0.5629808	5.209824	1
-GTACA	45	0.5625016	5.7837667	6
-ATAGC	45	0.5625016	5.7837667	9
-TCTAC	50	0.5571041	5.1554413	8
-GCGAG	55	0.53800714	9.052216	1
-ACGGG	55	0.5380071	9.052214	1
-GATAA	35	0.5372016	7.1018047	6
-AATAG	35	0.5372016	7.101805	7
-CAACT	45	0.53485817	5.4995303	6
-CATAC	45	0.53485817	5.4995303	5
-GATTC	45	0.52730757	5.421894	6
-AGGTA	40	0.5258433	12.165386	5
-CGGTC	60	0.52315664	8.068819	5
-ACGAG	45	0.51393795	5.2844243	7
-TATTC	40	0.5130085	5.9342256	7
-CTAAA	35	0.51080143	6.7527957	9
-TACAA	35	0.51080143	5.402236	35-39
-CCTTA	45	0.5013937	5.1554413	6
-CAGTA	40	0.50000143	5.7837667	4
-GTGTA	40	0.49294293	5.702118	4
-TAACT	35	0.47884214	6.330293	8
-CTTAA	35	0.47884214	6.330293	7
-CTATA	35	0.47884214	6.330293	4
-TTAAC	35	0.47884214	6.330293	8
-TATCA	35	0.4788421	6.3302927	5
-TCAAC	40	0.47542948	5.499531	7
-ACTCA	40	0.47542942	5.49953	8
-TTAGT	35	0.47208238	10.120425	95-96
-TGTAT	35	0.47208238	6.2409286	3
-ATTGT	35	0.47208235	6.240928	8
-GTTAC	40	0.46871787	5.421894	6
-TGTAC	40	0.46871787	10.843788	7
-AGAGT	35	0.46011293	6.082693	5
-AGTAG	35	0.46011293	6.082693	5
-CTCCG	55	0.45599285	7.672287	6
-GGTAG	40	0.45038468	5.2098246	2
-TTTAC	35	0.44888243	5.9342256	8
-CTACT	40	0.44568333	5.1554418	4
-AACTA	30	0.4378298	6.7527947	9
-TATAG	30	0.43164897	6.6574664	5
-ATATA	25	0.4199739	7.7728767	9
-CTCAA	35	0.41600078	5.499531	9
-TATAC	30	0.4104361	6.3302927	5
-ACTAT	30	0.4104361	6.3302927	6
-TACTA	30	0.4104361	6.3302927	5
-TCGAT	35	0.41012815	10.843788	7
-ACGTT	35	0.41012815	5.421894	4
-CGAAA	30	0.40002972	6.169792	9
-GTAAG	30	0.3943825	6.082693	8
-ATAGG	30	0.3943825	6.082693	3
-TCCTA	35	0.38997287	5.1554413	5
-TTACC	35	0.38997287	5.1554413	7
-ACCGA	35	0.3800853	5.0247273	7
-GCATA	30	0.37500107	5.7837667	1
-TCGAA	30	0.37500107	5.7837667	4
-GCTAA	30	0.37500107	5.7837667	8
-TAGGT	30	0.3697072	5.7021174	7
-GTTAG	30	0.3697072	5.702118	6
-CAATA	25	0.36485815	6.7527947	5
-ATACC	30	0.35657212	5.499531	6
-GACGA	30	0.3426253	5.284424	6
-AAGCG	30	0.3426253	10.568848	7
-GTTTA	25	0.33720168	6.2409286	7
-GTATT	25	0.33720168	12.481857	6
-AGATA	20	0.30697232	7.1018047	5
-CGTCA	30	0.30540386	9.420691	5
-CCTAA	25	0.29714343	5.499531	7
-TACCA	25	0.2971434	5.49953	9
-TGCTA	25	0.29294866	5.421894	7
-TACGT	25	0.29294863	5.4218936	9
-AGACG	25	0.2855211	5.284425	9
-CCTAT	25	0.2785521	5.1554418	3
-TAAGC	20	0.25000072	5.7837667	9
-CTAAG	20	0.25000072	5.7837667	8
-CGATT	20	0.23435894	5.421894	9
-GGGTA	20	0.22519234	5.2098246	2
-ACGCA	20	0.21719159	5.0247273	5
-GCGAA	15	0.17131266	5.284425	3
-CGAAC	15	0.16289368	5.0247273	5
+>>Kmer Content	pass
 >>END_MODULE
diff --git a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2b537d9767cbc1ddbd9f2e528a1c122dfe973d7c
--- /dev/null
+++ b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/CutadaptTest.scala
@@ -0,0 +1,84 @@
+/**
+ * Biopet is built on top of GATK Queue for building bioinformatic
+ * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
+ * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
+ * should also be able to execute Biopet tools and pipelines.
+ *
+ * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
+ *
+ * Contact us at: sasc@lumc.nl
+ *
+ * A dual licensing mode is applied. The source code within this project that are
+ * not part of GATK Queue is freely available for non-commercial use under an AGPL
+ * license; For commercial users or users who do not want to follow the AGPL
+ * license, please contact us to obtain a separate license.
+ */
+package nl.lumc.sasc.biopet.pipelines.flexiprep
+
+import java.io.File
+
+import org.testng.annotations.Test
+
+class CutadaptTest extends FastqcV0101Test {
+  /** Mock output file of a Cutadapt 1.9 run */
+  private[flexiprep] val cutadaptOut: File = resourceFile("ct-test.R1.clip.stats")
+
+  def testFastQCinstance: Fastqc = {
+    val fqc = new Fastqc(null)
+    fqc.output = outputv0101
+    fqc.contaminants = Option(resourceFile("fqc_contaminants_v0112.txt"))
+    //    fqc.beforeGraph()
+    fqc
+  }
+
+  def testCutadaptInst: Cutadapt = {
+    val caExe = new Cutadapt(null, testFastQCinstance)
+    caExe.statsOutput = cutadaptOut
+    caExe
+  }
+
+  @Test def testAdapterFound() = {
+    val cutadapt = testCutadaptInst
+    val adapters = cutadapt.extractClippedAdapters(cutadaptOut)
+    adapters.keys.size shouldBe 4
+
+    adapters.get("CAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC") shouldBe Some(
+      Map(
+        "count" -> 94,
+        "histogram" -> Map(
+          "5p" -> Map(5 -> 2, 6 -> 4, 9 -> 1, 3 -> 8, 4 -> 3),
+          "3p" -> Map(5 -> 21, 6 -> 18, 9 -> 1, 12 -> 1, 7 -> 2, 3 -> 13, 11 -> 1, 4 -> 19)
+        )
+      )
+    )
+
+    adapters.get("CAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCAGACGTGTGCTCTTCCGATC") shouldBe Some(
+      Map(
+        "count" -> 0,
+        "histogram" -> Map()
+      )
+    )
+  }
+
+  @Test def testSummary() = {
+    val cutadapt = testCutadaptInst
+    val summary = cutadapt.summaryStats
+
+    summary.keys shouldBe Set("num_bases_input", "num_reads_input", "num_reads_output",
+      "num_reads_with_adapters", "num_reads_affected", "num_reads_discarded_too_long",
+      "adapters", "num_reads_discarded_many_n", "num_reads_discarded_too_short", "num_bases_output")
+
+    summary.keys.size shouldBe 10
+    summary("adapters").asInstanceOf[Map[String, Map[String, Any]]].keys.size shouldBe 4
+
+    summary("num_bases_input") shouldBe 100000
+    summary("num_reads_input") shouldBe 1000
+    summary("num_reads_output") shouldBe 985
+    summary("num_reads_with_adapters") shouldBe 440
+    summary("num_reads_affected") shouldBe 425
+    summary("num_reads_discarded_too_long") shouldBe 0
+    summary("num_reads_discarded_many_n") shouldBe 0
+    summary("num_reads_discarded_too_short") shouldBe 15
+    summary("num_bases_output") shouldBe 89423
+  }
+}
diff --git a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala
index 4cb68fdfc44d5a30c3ed76aabc9570d6f62529f3..3cf24e8c60a570e8e51fe528ece4f81d0b66a01a 100644
--- a/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala
+++ b/flexiprep/src/test/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FastqcV0101Test.scala
@@ -25,14 +25,14 @@ import org.testng.annotations.Test
 class FastqcV0101Test extends TestNGSuite with Matchers {
 
   /** Returns the absolute path to test resource directory as a File object */
-  private val resourceDir: File = new File(Paths.get(getClass.getResource("/").toURI).toString)
+  private[flexiprep] val resourceDir: File = new File(Paths.get(getClass.getResource("/").toURI).toString)
 
   /** Given a resource file name, returns the the absolute path to it as a File object */
-  private def resourceFile(p: String): File = new File(resourceDir, p)
+  private[flexiprep] def resourceFile(p: String): File = new File(resourceDir, p)
 
   /** Mock output file of a FastQC v0.10.1 run */
   // the file doesn't actually exist, we just need it so the outputDir value can be computed correctly
-  private val outputv0101: File = resourceFile("v0101.fq_fastqc.zip")
+  private[flexiprep] val outputv0101: File = resourceFile("v0101.fq_fastqc.zip")
 
   @Test def testOutputDir() = {
     val fqc = new Fastqc(null)
@@ -44,7 +44,7 @@ class FastqcV0101Test extends TestNGSuite with Matchers {
     val fqc = new Fastqc(null)
     fqc.output = outputv0101
     // 11 QC modules
-    fqc.qcModules.size shouldBe 11
+    fqc.qcModules.size shouldBe 12
     // first module
     fqc.qcModules.keySet should contain("Basic Statistics")
     // mid (6th module)
@@ -83,4 +83,23 @@ class FastqcV0101Test extends TestNGSuite with Matchers {
     adapters.last.seq shouldEqual "GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG"
 
   }
+
+  @Test def testPerBaseSequenceQuality() = {
+    val fqc = new Fastqc(null)
+    fqc.output = outputv0101
+
+    val perBaseSequenceQuality = fqc.perBaseSequenceQuality
+    perBaseSequenceQuality.size shouldBe 55
+    perBaseSequenceQuality.keys should contain("54-55")
+  }
+
+  @Test def testPerBaseSequenceContent() = {
+    val fqc = new Fastqc(null)
+    fqc.output = outputv0101
+
+    val perBaseSequenceContent: Map[String, Map[String, Double]] = fqc.perBaseSequenceContent
+    perBaseSequenceContent.size shouldBe 55
+    perBaseSequenceContent.keys should contain("1")
+  }
+
 }
\ No newline at end of file
diff --git a/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala
index 6e2aa683f8e6e2abe31e2e8307d71db8c41c5258..d7c40fb76197f77ddb944803c113b65cf124a0bf 100644
--- a/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala
+++ b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala
@@ -30,8 +30,8 @@ import nl.lumc.sasc.biopet.extensions.picard.CreateSequenceDictionary
 import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFaidx
 import nl.lumc.sasc.biopet.utils.ConfigUtils
 import org.broadinstitute.gatk.queue.QScript
-import scala.language.reflectiveCalls
 
+import scala.language.reflectiveCalls
 import scala.collection.JavaConversions._
 
 class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript {
@@ -173,7 +173,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript
 
         genomeConfig.get("dbsnp_vcf_uri").foreach { dbsnpUri =>
           val cv = new CombineVariants(this)
-          cv.reference = fastaFile
+          cv.reference_sequence = fastaFile
           cv.deps ::= createDict.output
           def addDownload(uri: String): Unit = {
             val curl = new Curl(this)
@@ -181,7 +181,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript
             curl.output = new File(annotationDir, new File(curl.url).getName)
             curl.isIntermediate = true
             add(curl)
-            cv.inputFiles ::= curl.output
+            cv.variant :+= curl.output
 
             val tabix = new Tabix(this)
             tabix.input = curl.output
@@ -198,7 +198,7 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript
             case _                    => addDownload(dbsnpUri.toString)
           }
 
-          cv.outputFile = new File(annotationDir, "dbsnp.vcf.gz")
+          cv.out = new File(annotationDir, "dbsnp.vcf.gz")
           add(cv)
         }
 
diff --git a/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala
index b07d295a88d9044f351f738320c23692f53711bf..8f470ee063d85c5c4b14e6e261c4cc1cb323ff2d 100644
--- a/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala
+++ b/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala
@@ -29,6 +29,7 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript =>
 
   /**
    * Method to add a bamFile to the pipeline
+   *
    * @param id Unique id used for this bam file, most likely to be a sampleName
    * @param file Location of the bam file
    */
@@ -51,6 +52,8 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript =>
     require(bamFiles.nonEmpty)
   }
 
+  lazy val mergeCountFiles: Boolean = config("merge_count_files", default = true)
+
   private var extraSummaryFiles: Map[String, File] = Map()
 
   def addMergeTableJob(countFiles: List[File],
@@ -58,18 +61,22 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript =>
                        name: String,
                        fileExtension: String,
                        args: MergeArgs = mergeArgs): Unit = {
-    add(MergeTables(this, countFiles, outputFile,
-      args.idCols, args.valCol, args.numHeaderLines, args.fallback, fileExtension = Some(fileExtension)))
-    extraSummaryFiles += s"${name}_table" -> outputFile
+    if (mergeCountFiles) {
+      add(MergeTables(this, countFiles, outputFile,
+        args.idCols, args.valCol, args.numHeaderLines, args.fallback, fileExtension = Some(fileExtension)))
+      extraSummaryFiles += s"${name}_table" -> outputFile
+    }
   }
 
   def addHeatmapJob(countTable: File, outputFile: File, name: String, countType: Option[String] = None): Unit = {
-    val job = new PlotHeatmap(qscript)
-    job.input = countTable
-    job.output = outputFile
-    job.countType = countType
-    add(job)
-    extraSummaryFiles += s"${name}_heatmap" -> outputFile
+    if (mergeCountFiles) {
+      val job = new PlotHeatmap(qscript)
+      job.input = countTable
+      job.output = outputFile
+      job.countType = countType
+      add(job)
+      extraSummaryFiles += s"${name}_heatmap" -> outputFile
+    }
   }
 
   /** Must return a map with used settings for this pipeline */
diff --git a/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala b/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala
index afc6bbdc3ba737db63f5c4270009de0a60b8deaa..d2303ac3e014110652af209dbaee180565405ca0 100644
--- a/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala
+++ b/gwas-test/src/main/scala/nl/lumc/sasc/biopet/pipelines/gwastest/GwasTest.scala
@@ -82,10 +82,10 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
             gensToVcf.outputVcf = new File(outputDirGens, gen._1.genotypes.getName + s".${gen._2}.vcf.gz")
             gensToVcf.isIntermediate = true
             add(gensToVcf)
-            cvChr.inputFiles :+= gensToVcf.outputVcf
+            cvChr.variant :+= gensToVcf.outputVcf
           }
           add(cvChr)
-          cvTotal.inputFiles :+= cvChr.outputFile
+          cvTotal.variant :+= cvChr.outputFile
           contig -> cvChr.outputFile
       }
       add(cvTotal)
@@ -105,14 +105,14 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
         bedFile.deleteOnExit()
 
         val sv = new SelectVariants(this)
-        sv.inputFiles :+= chrVcfFiles.getOrElse(region.chr, vcfFile)
-        sv.outputFile = new File(regionDir, s"$name.vcf.gz")
+        sv.variant = chrVcfFiles.getOrElse(region.chr, vcfFile)
+        sv.out = new File(regionDir, s"$name.vcf.gz")
         sv.intervals :+= bedFile
         sv.isIntermediate = true
         add(sv)
 
         val snptest = new Snptest(this)
-        snptest.inputGenotypes :+= sv.outputFile
+        snptest.inputGenotypes :+= sv.out
         snptest.inputSampleFiles :+= phenotypeFile
         snptest.outputFile = Some(new File(regionDir, s"$name.snptest"))
         add(snptest)
@@ -127,7 +127,7 @@ class GwasTest(val root: Configurable) extends QScript with BiopetQScript with R
       }
 
     val cv = new CatVariants(this)
-    cv.inputFiles = snpTests.map(_._2).toList
+    cv.variant = snpTests.map(_._2).toList
     cv.outputFile = new File(outputDir, "snptest" + File.separator + "snptest.vcf.gz")
     add(cv)
   }
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala
index 7bebb491a55a52b7b77ce8141f11f162ab7eb643..ed0e1318d96c615346172b5e1add0df6dc4476d0 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/Shiva.scala
@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva
 
 import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
 import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
-import nl.lumc.sasc.biopet.extensions.gatk.broad._
+import nl.lumc.sasc.biopet.extensions.gatk._
 import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
 import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
 import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala
index b2055a09f725f8a67321cbad36c5bea05a94bb13..4f84d061198ee9449811cd414c72e661ee1e501b 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcalling.scala
@@ -26,6 +26,7 @@ import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.{ VarscanCnsSingleSamp
 import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
 import nl.lumc.sasc.biopet.utils.config.Configurable
 import org.broadinstitute.gatk.queue.QScript
+import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
 
 /**
  * Implementation of ShivaVariantcalling
@@ -86,10 +87,10 @@ class ShivaVariantcalling(val root: Configurable) extends QScript
     require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", "))
 
     val cv = new CombineVariants(qscript)
-    cv.outputFile = finalFile
-    cv.setKey = "VariantCaller"
-    cv.genotypeMergeOptions = Some("PRIORITIZE")
-    cv.rodPriorityList = callers.map(_.name).mkString(",")
+    cv.out = finalFile
+    cv.setKey = Some("VariantCaller")
+    cv.genotypemergeoption = Some("PRIORITIZE")
+    cv.rod_priority_list = Some(callers.map(_.name).mkString(","))
     for (caller <- callers) {
       caller.inputBams = inputBams
       caller.namePrefix = namePrefix
@@ -110,17 +111,17 @@ class ShivaVariantcalling(val root: Configurable) extends QScript
         vtDecompose.inputVcf = vtNormalize.outputVcf
         vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz")
         add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
-        cv.addInput(vtDecompose.outputVcf, caller.name)
+        cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name)
       } else if (normalize && !decompose) {
         vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz")
         add(vtNormalize, Tabix(this, vtNormalize.outputVcf))
-        cv.addInput(vtNormalize.outputVcf, caller.name)
+        cv.variant :+= TaggedFile(vtNormalize.outputVcf, caller.name)
       } else if (!normalize && decompose) {
         vtDecompose.inputVcf = caller.outputFile
         vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz")
         add(vtDecompose, Tabix(this, vtDecompose.outputVcf))
-        cv.addInput(vtDecompose.outputVcf, caller.name)
-      } else cv.addInput(caller.outputFile, caller.name)
+        cv.variant :+= TaggedFile(vtDecompose.outputVcf, caller.name)
+      } else cv.variant :+= TaggedFile(caller.outputFile, caller.name)
     }
     add(cv)
 
@@ -139,9 +140,9 @@ class ShivaVariantcalling(val root: Configurable) extends QScript
 
     referenceVcf.foreach(referenceVcfFile => {
       val gc = new GenotypeConcordance(this)
-      gc.evalFile = vcfFile
-      gc.compFile = referenceVcfFile
-      gc.outputFile = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt")
+      gc.eval = vcfFile
+      gc.comp = referenceVcfFile
+      gc.out = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt")
       referenceVcfRegions.foreach(gc.intervals ::= _)
       add(gc)
       addSummarizable(gc, s"$namePrefix-genotype_concordance-$name")
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala
index 98fe0e0a06342cee60db461acc33f1a64b5c23b2..91f8468b189b878d756554782b956a8a0037ceef 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala
@@ -41,7 +41,7 @@ class Delly(val root: Configurable) extends SvCaller {
         delly.analysistype = "DEL"
         delly.outputvcf = new File(dellyDir, sample + ".delly.del.vcf")
         add(delly)
-        catVariants.inputFiles :+= delly.outputvcf
+        catVariants.variant :+= delly.outputvcf
       }
       if (dup) {
         val delly = new DellyCaller(this)
@@ -49,7 +49,7 @@ class Delly(val root: Configurable) extends SvCaller {
         delly.analysistype = "DUP"
         delly.outputvcf = new File(dellyDir, sample + ".delly.dup.vcf")
         add(delly)
-        catVariants.inputFiles :+= delly.outputvcf
+        catVariants.variant :+= delly.outputvcf
       }
       if (inv) {
         val delly = new DellyCaller(this)
@@ -57,18 +57,18 @@ class Delly(val root: Configurable) extends SvCaller {
         delly.analysistype = "INV"
         delly.outputvcf = new File(dellyDir, sample + ".delly.inv.vcf")
         add(delly)
-        catVariants.inputFiles :+= delly.outputvcf
+        catVariants.variant :+= delly.outputvcf
       }
       if (tra) {
         val delly = new DellyCaller(this)
         delly.input = bamFile
         delly.analysistype = "TRA"
         delly.outputvcf = new File(dellyDir, sample + ".delly.tra.vcf")
-        catVariants.inputFiles :+= delly.outputvcf
+        catVariants.variant :+= delly.outputvcf
         add(delly)
       }
 
-      require(catVariants.inputFiles.nonEmpty, "Must atleast 1 SV-type be selected for Delly")
+      require(catVariants.variant.nonEmpty, "Must atleast 1 SV-type be selected for Delly")
 
       add(catVariants)
       addVCF(sample, catVariants.outputFile)
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCaller.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCaller.scala
index cd039f946a182b70c23d715b9efc3cf57960f2b4..1224592eb7fb66eb4075eb9aff9215379d6553c4 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCaller.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCaller.scala
@@ -5,7 +5,7 @@
  */
 package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
 
-import nl.lumc.sasc.biopet.extensions.gatk.broad
+import nl.lumc.sasc.biopet.extensions.gatk
 import nl.lumc.sasc.biopet.utils.config.Configurable
 
 /** Default mode for the haplotypecaller */
@@ -14,7 +14,7 @@ class HaplotypeCaller(val root: Configurable) extends Variantcaller {
   protected def defaultPrio = 1
 
   def biopetScript() {
-    val hc = broad.HaplotypeCaller(this, inputBams.values.toList, outputFile)
+    val hc = gatk.HaplotypeCaller(this, inputBams.values.toList, outputFile)
     add(hc)
   }
 }
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala
index 3de2234b78317c0182aaf8db6a163c087b4afe34..09e7b5e0286fee0da538c23ce9d8b5f639df1555 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerAllele.scala
@@ -5,7 +5,7 @@
  */
 package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
 
-import nl.lumc.sasc.biopet.extensions.gatk.broad
+import nl.lumc.sasc.biopet.extensions.gatk
 import nl.lumc.sasc.biopet.utils.config.Configurable
 
 /** Allele mode for Haplotypecaller */
@@ -14,7 +14,7 @@ class HaplotypeCallerAllele(val root: Configurable) extends Variantcaller {
   protected def defaultPrio = 5
 
   def biopetScript() {
-    val hc = broad.HaplotypeCaller(this, inputBams.values.toList, outputFile)
+    val hc = gatk.HaplotypeCaller(this, inputBams.values.toList, outputFile)
     hc.alleles = config("input_alleles")
     hc.genotyping_mode = Some("GENOTYPE_GIVEN_ALLELES")
     add(hc)
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala
index eba59ccdbb73024f3435cda5fbc2e1e40293f98d..585c33d649cf304e32c19e28ee7c6c294a75eca7 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/HaplotypeCallerGvcf.scala
@@ -5,7 +5,7 @@
  */
 package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
 
-import nl.lumc.sasc.biopet.extensions.gatk.broad
+import nl.lumc.sasc.biopet.extensions.gatk
 import nl.lumc.sasc.biopet.utils.config.Configurable
 
 /** Gvcf mode for haplotypecaller */
@@ -21,13 +21,13 @@ class HaplotypeCallerGvcf(val root: Configurable) extends Variantcaller {
   def getGvcfs = gVcfFiles
 
   def biopetScript() {
-    gVcfFiles = for ((sample, inputBam) <- inputBams) yield {
-      val hc = broad.HaplotypeCaller.gvcf(this, inputBam, new File(outputDir, sample + ".gvcf.vcf.gz"))
+    val gvcfFiles = for ((sample, inputBam) <- inputBams) yield {
+      val hc = gatk.HaplotypeCaller.gvcf(this, inputBam, new File(outputDir, sample + ".gvcf.vcf.gz"))
       add(hc)
       sample -> hc.out
     }
 
-    val genotypeGVCFs = broad.GenotypeGVCFs(this, gVcfFiles.values.toList, outputFile)
+    val genotypeGVCFs = gatk.GenotypeGVCFs(this, gvcfFiles.toList, outputFile)
     add(genotypeGVCFs)
   }
 }
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala
index 847e671166191da3153cc2df818828c66de37aa1..ec46b9c348b3761786195777057852a96f6b214f 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala
@@ -15,11 +15,9 @@
  */
 package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
 
-import java.io.File
-
 import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
 import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup
-import nl.lumc.sasc.biopet.extensions.tools.{ VcfFilter, MpileupToVcf }
+import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter }
 import nl.lumc.sasc.biopet.utils.config.Configurable
 
 /** Makes a vcf file from a mpileup without statistics */
@@ -60,9 +58,9 @@ class RawVcf(val root: Configurable) extends Variantcaller {
     }
 
     val cv = new CombineVariants(this)
-    cv.inputFiles = rawFiles.toList
-    cv.outputFile = outputFile
-    cv.setKey = "null"
+    cv.variant = rawFiles.toList
+    cv.out = outputFile
+    cv.setKey = Some("null")
     cv.excludeNonVariants = !keepRefCalls
     add(cv)
   }
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyper.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyper.scala
index 96c3821bcb1343163507d9b0e9a950f477a47c6c..43fbe730d4b585edacff62b7a8388a5c82fbe062 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyper.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyper.scala
@@ -5,7 +5,7 @@
  */
 package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
 
-import nl.lumc.sasc.biopet.extensions.gatk.broad
+import nl.lumc.sasc.biopet.extensions.gatk
 import nl.lumc.sasc.biopet.utils.config.Configurable
 
 /** Default mode for UnifiedGenotyper */
@@ -14,7 +14,7 @@ class UnifiedGenotyper(val root: Configurable) extends Variantcaller {
   protected def defaultPrio = 20
 
   def biopetScript() {
-    val ug = broad.UnifiedGenotyper(this, inputBams.values.toList, outputFile)
+    val ug = gatk.UnifiedGenotyper(this, inputBams.values.toList, outputFile)
     add(ug)
   }
 }
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala
index 8ffdcd962107840ccfbf175310c70035c8c668a1..364691f517c7434a39dc498a58ac1349e7e46d2f 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/UnifiedGenotyperAllele.scala
@@ -5,7 +5,7 @@
  */
 package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
 
-import nl.lumc.sasc.biopet.extensions.gatk.broad
+import nl.lumc.sasc.biopet.extensions.gatk
 import nl.lumc.sasc.biopet.utils.config.Configurable
 
 /** Allele mode for GenotyperAllele */
@@ -14,7 +14,7 @@ class UnifiedGenotyperAllele(val root: Configurable) extends Variantcaller {
   protected def defaultPrio = 9
 
   def biopetScript() {
-    val ug = broad.UnifiedGenotyper(this, inputBams.values.toList, outputFile)
+    val ug = gatk.UnifiedGenotyper(this, inputBams.values.toList, outputFile)
     ug.alleles = config("input_alleles")
     ug.genotyping_mode = Some("GENOTYPE_GIVEN_ALLELES")
     add(ug)
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala
index 9a0fb2839413948de68d3d16101fc4ce912df5b3..cb213f28e6485c04c7bf8f76a7293cf062516d8a 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala
@@ -35,7 +35,8 @@ class VarscanCnsSingleSample(val root: Configurable) extends Variantcaller {
       "disable_baq" -> true,
       "depth" -> 1000000
     ),
-    "varscanmpileup2cns" -> Map("strand_filter" -> 0)
+    "varscanmpileup2cns" -> Map("strand_filter" -> 0),
+    "combinevariants" -> Map("scattercount" -> 20)
   )
 
   override def fixedValues = Map(
@@ -67,9 +68,9 @@ class VarscanCnsSingleSample(val root: Configurable) extends Variantcaller {
     }
 
     val cv = new CombineVariants(this)
-    cv.inputFiles = sampleVcfs
-    cv.outputFile = outputFile
-    cv.setKey = "null"
+    cv.variant = sampleVcfs
+    cv.out = outputFile
+    cv.setKey = Some("null")
     cv.excludeNonVariants = true
     add(cv)
   }
diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala
index 736cff399b1dc09d41ea1df8cc6adfc697503d44..eb1d40ece24ce6862023eae3343b84c840ea3e42 100644
--- a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala
+++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTest.scala
@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva
 import java.io.{ File, FileOutputStream }
 
 import com.google.common.io.Files
-import nl.lumc.sasc.biopet.extensions.gatk.broad._
+import nl.lumc.sasc.biopet.extensions.gatk.{ BaseRecalibrator, IndelRealigner, PrintReads, RealignerTargetCreator }
 import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
 import nl.lumc.sasc.biopet.extensions.tools.VcfStats
 import nl.lumc.sasc.biopet.utils.ConfigUtils
diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala
index f85137cc27a87069021752d42517e3fe6685359a..8c9dcb1e5496d3e43792bcc83d56644396eaeee6 100644
--- a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala
+++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala
@@ -11,16 +11,14 @@ import com.google.common.io.Files
 import nl.lumc.sasc.biopet.core.BiopetPipe
 import nl.lumc.sasc.biopet.extensions.Freebayes
 import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge }
+import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, HaplotypeCaller, UnifiedGenotyper }
 import nl.lumc.sasc.biopet.utils.config.Config
-import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
-import nl.lumc.sasc.biopet.extensions.gatk.broad.{ HaplotypeCaller, UnifiedGenotyper }
 import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats }
 import nl.lumc.sasc.biopet.utils.ConfigUtils
-import org.apache.commons.io.FileUtils
 import org.broadinstitute.gatk.queue.QSettings
 import org.scalatest.Matchers
 import org.scalatest.testng.TestNGSuite
-import org.testng.annotations.{ AfterClass, DataProvider, Test }
+import org.testng.annotations.{ DataProvider, Test }
 
 import scala.collection.mutable.ListBuffer
 
diff --git a/toucan/pom.xml b/toucan/pom.xml
index 781e458c31cc8128843b55873781e3aaa9f8b1e0..62a9699f8f63680d1c8b6bce67a7390907ca8b34 100644
--- a/toucan/pom.xml
+++ b/toucan/pom.xml
@@ -43,5 +43,17 @@
             <artifactId>BiopetToolsExtensions</artifactId>
             <version>${project.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.testng</groupId>
+            <artifactId>testng</artifactId>
+            <version>6.8</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.scalatest</groupId>
+            <artifactId>scalatest_2.10</artifactId>
+            <version>2.2.1</version>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 </project>
diff --git a/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
index 2e3ffe26a1735dbea0f664bf8c4c957bd7262f7b..58dcaf82447daef751eae0d21ac7c43387026149 100644
--- a/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
+++ b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
@@ -40,15 +40,17 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
   @Input(doc = "Input GVCF file", shortName = "gvcf", required = false)
   var inputGvcf: Option[File] = None
 
-  var sampleIds: List[String] = Nil
+  var outputVcf: Option[File] = None
+
+  def sampleInfo: Map[String, Map[String, Any]] = root match {
+    case m: MultiSampleQScript => m.samples.map { case (sampleId, sample) => sampleId -> sample.sampleTags }
+    case null                  => VcfUtils.getSampleIds(inputVCF).map(x => x -> Map[String, Any]()).toMap
+    case s: SampleLibraryTag   => s.sampleId.map(x => x -> Map[String, Any]()).toMap
+    case _                     => throw new IllegalArgumentException("")
+  }
+
   def init(): Unit = {
     inputFiles :+= new InputFile(inputVCF)
-    sampleIds = root match {
-      case m: MultiSampleQScript => m.samples.keys.toList
-      case null                  => VcfUtils.getSampleIds(inputVCF)
-      case s: SampleLibraryTag   => s.sampleId.toList
-      case _                     => throw new IllegalArgumentException("You don't have any samples")
-    }
   }
 
   override def defaults = Map(
@@ -79,29 +81,29 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
     val gonlVcfFile: Option[File] = config("gonl_vcf")
     val exacVcfFile: Option[File] = config("exac_vcf")
 
-    var outputFile = normalizer.outputVcf
+    outputVcf = Some(normalizer.outputVcf)
 
     gonlVcfFile match {
       case Some(gonlFile) =>
         val vcfWithVcf = new VcfWithVcf(this)
-        vcfWithVcf.input = outputFile
+        vcfWithVcf.input = outputVcf.getOrElse(new File(""))
         vcfWithVcf.secondaryVcf = gonlFile
         vcfWithVcf.output = swapExt(outputDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz")
         vcfWithVcf.fields ::= ("AF", "AF_gonl", None)
         add(vcfWithVcf)
-        outputFile = vcfWithVcf.output
+        outputVcf = Some(vcfWithVcf.output)
       case _ =>
     }
 
     exacVcfFile match {
       case Some(exacFile) =>
         val vcfWithVcf = new VcfWithVcf(this)
-        vcfWithVcf.input = outputFile
+        vcfWithVcf.input = outputVcf.getOrElse(new File(""))
         vcfWithVcf.secondaryVcf = exacFile
-        vcfWithVcf.output = swapExt(outputDir, outputFile, ".vcf.gz", ".exac.vcf.gz")
+        vcfWithVcf.output = swapExt(outputDir, outputVcf.getOrElse(new File("")), ".vcf.gz", ".exac.vcf.gz")
         vcfWithVcf.fields ::= ("AF", "AF_exac", None)
         add(vcfWithVcf)
-        outputFile = vcfWithVcf.output
+        outputVcf = Some(vcfWithVcf.output)
       case _ =>
     }
 
@@ -116,7 +118,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
    * @param annotation: ManweDownloadAnnotateVcf object of annotated vcf
    * @return
    */
-  def importAndActivateSample(sampleID: String, inputVcf: File,
+  def importAndActivateSample(sampleID: String, sampleGroups: List[String], inputVcf: File,
                               gVCF: File, annotation: ManweAnnotateVcf): ManweActivateAfterAnnotImport = {
 
     val minGQ: Int = config("minimum_genome_quality", default = 20, namespace = "manwe")
@@ -165,6 +167,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
     imported.beds = List(bgzippedBed.output)
     imported.name = Some(sampleID)
     imported.public = isPublic
+    imported.group = sampleGroups
     imported.waitToComplete = false
     imported.isIntermediate = true
     imported.output = swapExt(outputDir, intersected.output, ".vcf.gz", ".manwe.import")
@@ -186,7 +189,6 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
   def varda(vcf: File, gVcf: File): File = {
 
     val annotationQueries: List[String] = config("annotation_queries", default = List("GLOBAL *"), namespace = "manwe")
-    //TODO: add groups!!! Need sample-specific group tags for this
 
     val annotate = new ManweAnnotateVcf(this)
     annotate.vcf = vcf
@@ -202,7 +204,14 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
     annotatedVcf.output = swapExt(outputDir, annotate.output, ".manwe.annot", "manwe.annot.vcf.gz")
     add(annotatedVcf)
 
-    val activates = sampleIds map { x => importAndActivateSample(x, vcf, gVcf, annotate) }
+    val activates = sampleInfo map { x =>
+      val sampleGroup = x._2.getOrElse("varda_group", Nil) match {
+        case x: List[String] => x
+        case Nil             => Nil
+        case _               => throw new IllegalArgumentException("Sample tag 'varda_group' is not a list of strings")
+      }
+      importAndActivateSample(x._1, sampleGroup, vcf, gVcf, annotate)
+    }
 
     val finalLn = new Ln(this)
     activates.foreach(x => finalLn.deps :+= x.output)