diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/GffRead.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/GffRead.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3213b90026cbfe5289fbbaf0620e8a8a499f054a
--- /dev/null
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/GffRead.scala
@@ -0,0 +1,28 @@
+package nl.lumc.sasc.biopet.extensions
+
+import java.io.File
+
+import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version }
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
+
+/**
+ * Created by pjvanthof on 20/06/16.
+ */
+class GffRead(val root: Configurable) extends BiopetCommandLineFunction {
+
+  executable = config("exe", default = "gffread", freeVar = false)
+
+  @Input
+  var input: File = _
+
+  @Output
+  var output: File = _
+
+  var T: Boolean = config("T", default = false, freeVar = false)
+
+  def cmdLine = executable +
+    (if (inputAsStdin) "" else required(input)) +
+    (if (outputAsStsout) "" else required("-o", output)) +
+    conditional(T, "-T")
+}
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Sed.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Sed.scala
new file mode 100644
index 0000000000000000000000000000000000000000..dbc06508a55d3cf2a6cb9a9e1c8dd2286ca9f4f8
--- /dev/null
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Sed.scala
@@ -0,0 +1,36 @@
+package nl.lumc.sasc.biopet.extensions
+
+import java.io.File
+
+import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Version }
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
+
+import scala.util.matching.Regex
+
+/**
+ * Created by pjvanthof on 18/05/16.
+ */
+class Sed(val root: Configurable) extends BiopetCommandLineFunction with Version {
+  executable = config("exe", default = "sed", freeVar = false)
+
+  /** Command to get version of executable */
+  override def versionCommand: String = executable + " --version"
+
+  /** Regex to get version from version command output */
+  override def versionRegex: Regex = """sed (GNU sed) \d+.\d+.\d+""".r
+
+  @Input(required = false)
+  var inputFile: File = _
+
+  @Output
+  var outputFile: File = _
+
+  var expressions: List[String] = Nil
+
+  def cmdLine = executable +
+    repeat("-e", expressions) +
+    (if (inputAsStdin) "" else required(inputFile)) +
+    (if (outputAsStsout) "" else " > " + required(outputFile))
+
+}
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/hisat/Hisat2Build.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/hisat/Hisat2Build.scala
index 3e3c92872ce12c0fc1a16c095cb9d9117fccd869..ed22aa3b8bbbcba890d4b410b30ad278cd65b664 100644
--- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/hisat/Hisat2Build.scala
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/hisat/Hisat2Build.scala
@@ -43,8 +43,7 @@ class Hisat2Build(val root: Configurable) extends BiopetCommandLineFunction with
   var exon: Option[File] = config("exon")
 
   executable = config("exe", default = "hisat2-build", freeVar = false)
-  def versionRegex = """.*hisat2-align-s version (.*)""".r
-  override def versionExitcode = List(0, 1)
+  def versionRegex = """.*hisat.*version (.*)""".r
   def versionCommand = executable + " --version"
 
   var bmax: Option[Int] = config("bmax")
diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/NormalizeFasta.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/NormalizeFasta.scala
new file mode 100644
index 0000000000000000000000000000000000000000..a8657736f5a30c088213b302d3fe7d2f639fcc70
--- /dev/null
+++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/NormalizeFasta.scala
@@ -0,0 +1,46 @@
+/**
+ * Biopet is built on top of GATK Queue for building bioinformatic
+ * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
+ * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
+ * should also be able to execute Biopet tools and pipelines.
+ *
+ * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
+ *
+ * Contact us at: sasc@lumc.nl
+ *
+ * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
+ * license; For commercial users or users who do not want to follow the AGPL
+ * license, please contact us to obtain a separate license.
+ */
+package nl.lumc.sasc.biopet.extensions.picard
+
+import java.io.File
+
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
+
+/**
+ * Created by sajvanderzeeuw on 6-10-15.
+ */
+class NormalizeFasta(val root: Configurable) extends Picard {
+
+  javaMainClass = new picard.reference.NormalizeFasta().getClass.getName
+
+  @Input(doc = "The input fasta file", required = true)
+  var input: File = _
+
+  @Output(doc = "The output fasta file", required = true)
+  var output: File = _
+
+  val lineLength: Int = config("line_length")
+
+  val truncateSequenceNameAtWhitespace: Boolean = config("truncate_sequence_name_at_whitespace", default = false)
+
+  override def cmdLine = super.cmdLine +
+    (if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false)
+    else required("INPUT=", input, spaceSeparated = false)) +
+    (if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
+    else required("OUTPUT=", output, spaceSeparated = false)) +
+    required("LINE_LENGTH=", lineLength, spaceSeparated = false) +
+    conditional(truncateSequenceNameAtWhitespace, "TRUNCATE_SEQUENCE_NAMES_AT_WHITESPACE=TRUE")
+}
diff --git a/biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala b/biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala
index 25985115855087bd810116ea99ed1ce96cda02e2..a57e71a0503eecda1cc18c3311bfa3a0bcf786d1 100644
--- a/biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala
+++ b/biopet-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutableMain.scala
@@ -14,7 +14,6 @@
  */
 package nl.lumc.sasc.biopet
 
-import nl.lumc.sasc.biopet.pipelines.generateindexes.GenerateIndexes
 import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand }
 
 object BiopetExecutableMain extends BiopetExecutable {
@@ -38,7 +37,8 @@ object BiopetExecutableMain extends BiopetExecutable {
     nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling,
     nl.lumc.sasc.biopet.pipelines.basty.Basty,
     nl.lumc.sasc.biopet.pipelines.shiva.Shiva,
-    GenerateIndexes
+    nl.lumc.sasc.biopet.pipelines.generateindexes.DownloadGenomes,
+    nl.lumc.sasc.biopet.pipelines.generateindexes.GenerateIndexes
   )
 
   def tools: List[MainCommand] = BiopetToolsExecutable.tools
diff --git a/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/DownloadNcbiAssembly.scala b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/DownloadNcbiAssembly.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3e0576309a90a6807d454e7cadb4dce208489243
--- /dev/null
+++ b/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/DownloadNcbiAssembly.scala
@@ -0,0 +1,51 @@
+/**
+ * Biopet is built on top of GATK Queue for building bioinformatic
+ * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
+ * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
+ * should also be able to execute Biopet tools and pipelines.
+ *
+ * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
+ *
+ * Contact us at: sasc@lumc.nl
+ *
+ * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
+ * license; For commercial users or users who do not want to follow the AGPL
+ * license, please contact us to obtain a separate license.
+ */
+package nl.lumc.sasc.biopet.extensions.tools
+
+import java.io.File
+
+import nl.lumc.sasc.biopet.core.ToolCommandFunction
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
+
+/**
+ * @deprecated Use picard.util.BedToIntervalList instead
+ */
+class DownloadNcbiAssembly(val root: Configurable) extends ToolCommandFunction {
+  def toolObject = nl.lumc.sasc.biopet.tools.DownloadNcbiAssembly
+
+  @Output(doc = "Output fasta file", required = true)
+  var output: File = _
+
+  var outputReport: File = _
+
+  var assemblyId: String = null
+
+  var nameHeader: Option[String] = None
+
+  var mustHaveOne: List[String] = Nil
+  var mustNotHave: List[String] = Nil
+
+  override def defaultCoreMemory = 4.0
+
+  override def cmdLine = super.cmdLine +
+    required("-a", assemblyId) +
+    required("--report", outputReport) +
+    required("-o", output) +
+    optional("--nameHeader", nameHeader) +
+    repeat("--mustHaveOne", mustHaveOne) +
+    repeat("--mustNotHave", mustNotHave)
+}
+
diff --git a/biopet-tools-package/src/main/scala/nl/lumc/sasc/biopet/BiopetToolsExecutable.scala b/biopet-tools-package/src/main/scala/nl/lumc/sasc/biopet/BiopetToolsExecutable.scala
index 45cb24741233150ad690787903a02f88985684f6..afc8453c4f27b2e97ce28dfa5bb643e133d7b048 100644
--- a/biopet-tools-package/src/main/scala/nl/lumc/sasc/biopet/BiopetToolsExecutable.scala
+++ b/biopet-tools-package/src/main/scala/nl/lumc/sasc/biopet/BiopetToolsExecutable.scala
@@ -49,5 +49,6 @@ object BiopetToolsExecutable extends BiopetExecutable {
     nl.lumc.sasc.biopet.tools.VcfToTsv,
     nl.lumc.sasc.biopet.tools.VcfWithVcf,
     nl.lumc.sasc.biopet.tools.VepNormalizer,
-    nl.lumc.sasc.biopet.tools.WipeReads)
+    nl.lumc.sasc.biopet.tools.WipeReads,
+    nl.lumc.sasc.biopet.tools.DownloadNcbiAssembly)
 }
diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/DownloadNcbiAssembly.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/DownloadNcbiAssembly.scala
new file mode 100644
index 0000000000000000000000000000000000000000..7d3f40eb10bb0ddcfd01c0678493dbd5a13cca2f
--- /dev/null
+++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/DownloadNcbiAssembly.scala
@@ -0,0 +1,97 @@
+package nl.lumc.sasc.biopet.tools
+
+import java.io.{ File, PrintWriter }
+
+import nl.lumc.sasc.biopet.utils.ToolCommand
+
+import scala.io.Source
+
+/**
+ * Created by pjvan_thof on 21-9-16.
+ */
+object DownloadNcbiAssembly extends ToolCommand {
+
+  case class Args(assemblyId: String = null,
+                  outputFile: File = null,
+                  reportFile: Option[File] = None,
+                  contigNameHeader: Option[String] = None,
+                  mustHaveOne: List[(String, String)] = List(),
+                  mustNotHave: List[(String, String)] = List()) extends AbstractArgs
+
+  class OptParser extends AbstractOptParser {
+    opt[String]('a', "assembly id") required () unbounded () valueName "<file>" action { (x, c) =>
+      c.copy(assemblyId = x)
+    }
+    opt[File]('o', "output") required () unbounded () valueName "<file>" action { (x, c) =>
+      c.copy(outputFile = x)
+    }
+    opt[File]("report") unbounded () valueName "<file>" action { (x, c) =>
+      c.copy(reportFile = Some(x))
+    }
+    opt[String]("nameHeader") unbounded () valueName "<string>" action { (x, c) =>
+      c.copy(contigNameHeader = Some(x))
+    }
+    opt[(String, String)]("mustHaveOne") unbounded () valueName "<string>" action { (x, c) =>
+      c.copy(mustHaveOne = (x._1, x._2) :: c.mustHaveOne)
+    }
+    opt[(String, String)]("mustNotHave") unbounded () valueName "<string>" action { (x, c) =>
+      c.copy(mustNotHave = (x._1, x._2) :: c.mustNotHave)
+    }
+  }
+
+  /**
+   * @param args the command line arguments
+   */
+  def main(args: Array[String]): Unit = {
+    val argsParser = new OptParser
+    val cmdargs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
+
+    logger.info(s"Reading ${cmdargs.assemblyId} from NCBI")
+    val reader = Source.fromURL(s"ftp://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/All/${cmdargs.assemblyId}.assembly.txt")
+    val assamblyReport = reader.getLines().toList
+    reader.close()
+    cmdargs.reportFile.foreach { file =>
+      val writer = new PrintWriter(file)
+      assamblyReport.foreach(writer.println)
+      writer.close()
+    }
+
+    val headers = assamblyReport.filter(_.startsWith("#")).last.stripPrefix("# ").split("\t").zipWithIndex.toMap
+    val nameId = cmdargs.contigNameHeader.map(x => headers(x))
+    val lengthId = headers.get("Sequence-Length")
+
+    val baseUrlEutils = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
+
+    val fastaWriter = new PrintWriter(cmdargs.outputFile)
+
+    val allContigs = assamblyReport.filter(!_.startsWith("#"))
+      .map(_.split("\t"))
+    val totalLength = lengthId.map(id => allContigs.map(_.apply(id).toLong).sum)
+
+    logger.info(s"${allContigs.size} contigs found")
+    totalLength.foreach(l => logger.info(s"Total length: ${l}"))
+
+    val filterContigs = allContigs
+      .filter(values => cmdargs.mustNotHave.forall(x => values(headers(x._1)) != x._2))
+      .filter(values => cmdargs.mustHaveOne.exists(x => values(headers(x._1)) == x._2) || cmdargs.mustHaveOne.isEmpty)
+    val filterLength = lengthId.map(id => filterContigs.map(_.apply(id).toLong).sum)
+
+    logger.info(s"${filterContigs.size} contigs left after filtering")
+    filterLength.foreach(l => logger.info(s"Filtered length: ${l}"))
+
+    filterContigs.foreach { values =>
+      val id = if (values(6) == "na") values(4) else values(6)
+      logger.info(s"Start download ${id}")
+      val fastaReader = Source.fromURL(s"${baseUrlEutils}/efetch.fcgi?db=nuccore&id=${id}&retmode=text&rettype=fasta")
+      fastaReader.getLines()
+        .map(x => nameId.map(y => x.replace(">", s">${values(y)} ")).getOrElse(x))
+        .foreach(fastaWriter.println)
+      fastaReader.close()
+    }
+
+    logger.info("Downloading complete")
+
+    fastaWriter.close()
+
+  }
+}
diff --git a/biopet-tools/src/test/resources/GCF_000844745.1.report b/biopet-tools/src/test/resources/GCF_000844745.1.report
new file mode 100644
index 0000000000000000000000000000000000000000..9f5e951151eab2ad3b68d2cb7508ca571ca35153
--- /dev/null
+++ b/biopet-tools/src/test/resources/GCF_000844745.1.report
@@ -0,0 +1,23 @@
+# Assembly name:  ViralProj14444
+# Organism name:  Ageratum yellow vein betasatellite (viruses)
+# Taxid:          185750
+# BioProject:     PRJNA14444
+# Submitter:      NCBI
+# Date:           2000-6-14
+# Assembly type:  n/a
+# Release type:   major
+# Assembly level: Complete Genome
+# Genome representation: full
+# RefSeq assembly accession: GCF_000844745.1
+#
+## Assembly-Units:
+## GenBank Unit Accession	RefSeq Unit Accession	Assembly-Unit name
+## 	GCF_000844735.1	Primary assembly
+#
+# Ordered by chromosome/plasmid; the chromosomes/plasmids are followed by
+# unlocalized scaffolds.
+# Unplaced scaffolds are listed at the end.
+# RefSeq is equal or derived from GenBank object.
+#
+# Sequence-Name	Sequence-Role	Assigned-Molecule	Assigned-Molecule-Location/Type	GenBank-Accn	Relationship	RefSeq-Accn	Assembly-Unit	Sequence-Length	UCSC-style-name
+Unknown	assembled-molecule	Unknown	Segment	na	<>	NC_003403.1	Primary assembly	1347	na
\ No newline at end of file
diff --git a/biopet-tools/src/test/resources/NC_003403.1.fasta b/biopet-tools/src/test/resources/NC_003403.1.fasta
new file mode 100644
index 0000000000000000000000000000000000000000..cf297759532bc910fd2df5d9f6e4acf2a5a1bd7e
--- /dev/null
+++ b/biopet-tools/src/test/resources/NC_003403.1.fasta
@@ -0,0 +1,22 @@
+>gi|18702122|ref|NC_003403.1| Ageratum yellow vein virus-associated DNA beta, complete genome
+ACCGGTGGCGAGCGGGGAATTTGGGTTCCAGGTGGGACCCACACCTGGAACAGCCTGGAAAGAACGTGAG
+TGGGCCAAATAAGTGGGCCGTTGAATTGGGCTTTTTATTCAAAACTATCTCAAATTTACGAAGCCCATTA
+TTATTCATTTGAGAAAAATTACATTAAACATAGTCCGTATAAATATACATTTATACGGTTACATTCTTGT
+ATACATGGTATTCGTCGTTTACTTGGATATCTATTACTGGAGCTTCCTGTTGCATCATGATATCTATTAT
+TTCAACCATGTCTTCTGATTTGAACTCTTCTATTGTTGAATCTCTATACATGATCCTTAGTATGTTCTTG
+ATACCTTCTTCTAAAGCATTAAAATTGAATGGTGGTATGATACCGTGATGTCCGTATGGAATGGAGAATT
+TGTTCTTGACTAGTGCTGGTGACCTTGTTGAAAACACTTCTACGTCAACTCTGAATTTTATTTCTGATCT
+GAACTTGACGTTGATGATGAACAATATTCCTTTATCGTTGGTATATGATATAGTCATGATTGTTTGATGG
+AAATATATTCATTTGATGTGCATTTATAATTGGATGTGTCTGTTGTATGTGGTTGTGATTAATGATCGTG
+TCAAATTTATGTTCTTGTATTAATTAGTGTATGTGTGACTGTATTGATGTTGATGTTTGTGATTATAATT
+GAGAAAAGAAGATATTCATTCATTTGAATAAGAAAAGAAAGAAAAAAAAAAGAAAACTATATACATAAAA
+AAAACGAATAATAAAAATAACTATCTATTCAGAAAAAATGGGAGCGCAGCGAAAACGAAAACAAAAATCG
+ATATCAAAGAAAATCCTGATAAATTAAATAGACAGAATTGAAAAGGAAAAAAGTAAAAAAAGATAAAAGT
+TATTACATTAAAAAAACTAAAGAAAAAAAAAATAAAAAAACTAAAAATGATAAATAAAAAAATGACGTCA
+TCTGATGTCATCACAGTTAAAAACTATTTACTGCGGTTAAAATTTAATATAATTAACCACTTGGGTAAAT
+ATATTGTCTCCAATAGGTAAATTGTCTCCAATATATCGGAGACATATTGGAGACTCTGGAGACACTTATA
+TGCTAATTTCCCATTTTACCCTTACTAATGTGACTGTAAGGCGCGTGGGAGTGCTCTGAAAAAGTAGACC
+TTCTCTCTCCAAAACTCCCCGGAGAAGACGATACAGGCTGATCCCGGCATCAATTCGCGACACGCGCGGC
+GGTGTGTACCCCTGGGAGGGTAGAAACCACTACGCTACGCAGCAGCCTTAGCTACGCCGGAGCTTAGCTC
+GCCACCGTAATAATATT
+
diff --git a/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/DownloadNcbiAssemblyTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/DownloadNcbiAssemblyTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..f495d7f2f4d5bf39857abea8f916953f71a87eaf
--- /dev/null
+++ b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/DownloadNcbiAssemblyTest.scala
@@ -0,0 +1,33 @@
+package nl.lumc.sasc.biopet.tools
+
+import java.io.File
+import java.nio.file.Paths
+
+import org.scalatest.Matchers
+import org.scalatest.testng.TestNGSuite
+import org.testng.annotations.Test
+
+import scala.io.Source
+
+/**
+ * Created by pjvanthof on 03/10/16.
+ */
+class DownloadNcbiAssemblyTest extends TestNGSuite with Matchers {
+  private def resourcePath(p: String): String = {
+    Paths.get(getClass.getResource(p).toURI).toString
+  }
+
+  @Test
+  def testNC_003403_1: Unit = {
+    val output = File.createTempFile("test.", ".fasta")
+    val outputReport = File.createTempFile("test.", ".report")
+    output.deleteOnExit()
+    outputReport.deleteOnExit()
+    DownloadNcbiAssembly.main(Array("-a", "GCF_000844745.1",
+      "-o", output.getAbsolutePath,
+      "--report", outputReport.getAbsolutePath))
+
+    Source.fromFile(output).getLines().toList shouldBe Source.fromFile(new File(resourcePath("/NC_003403.1.fasta"))).getLines().toList
+    Source.fromFile(outputReport).getLines().toList shouldBe Source.fromFile(new File(resourcePath("/GCF_000844745.1.report"))).getLines().toList
+  }
+}
diff --git a/generate-indexes/pom.xml b/generate-indexes/pom.xml
index 47be110c6f01a977ba080d32ca4e468d11a3065e..6e37240f24301e63a33d46957357397e4bc36ea6 100644
--- a/generate-indexes/pom.xml
+++ b/generate-indexes/pom.xml
@@ -41,7 +41,7 @@
         </dependency>
         <dependency>
             <groupId>nl.lumc.sasc</groupId>
-            <artifactId>BiopetExtensions</artifactId>
+            <artifactId>BiopetToolsExtensions</artifactId>
             <version>${project.version}</version>
         </dependency>
         <dependency>
diff --git a/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/DownloadGenomes.scala b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/DownloadGenomes.scala
new file mode 100644
index 0000000000000000000000000000000000000000..5476cdba89de2b80013852c58173d0d423ea7502
--- /dev/null
+++ b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/DownloadGenomes.scala
@@ -0,0 +1,265 @@
+/**
+ * Biopet is built on top of GATK Queue for building bioinformatic
+ * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
+ * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
+ * should also be able to execute Biopet tools and pipelines.
+ *
+ * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
+ *
+ * Contact us at: sasc@lumc.nl
+ *
+ * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
+ * license; For commercial users or users who do not want to follow the AGPL
+ * license, please contact us to obtain a separate license.
+ */
+package nl.lumc.sasc.biopet.pipelines.generateindexes
+
+import java.io.File
+import java.util
+
+import nl.lumc.sasc.biopet.core.extensions.Md5sum
+import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
+import nl.lumc.sasc.biopet.extensions._
+import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
+import nl.lumc.sasc.biopet.extensions.picard.NormalizeFasta
+import nl.lumc.sasc.biopet.extensions.tools.DownloadNcbiAssembly
+import nl.lumc.sasc.biopet.utils.ConfigUtils
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.queue.QScript
+
+import scala.collection.JavaConversions._
+import scala.language.reflectiveCalls
+
+class DownloadGenomes(val root: Configurable) extends QScript with BiopetQScript {
+  def this() = this(null)
+
+  @Argument(required = true)
+  var referenceConfigFiles: List[File] = Nil
+
+  var referenceConfig: Map[String, Any] = _
+
+  override def fixedValues = Map("gffread" -> Map("T" -> true))
+
+  override def defaults = Map("normalizefasta" -> Map("line_length" -> 60))
+
+  val downloadAnnotations: Boolean = config("download_annotations", default = false)
+
+  /** This is executed before the script starts */
+  def init(): Unit = {
+    if (referenceConfig == null)
+      referenceConfig = referenceConfigFiles.foldLeft(Map[String, Any]())((a, b) => ConfigUtils.mergeMaps(a, ConfigUtils.fileToConfigMap(b)))
+  }
+
+  /** Method where jobs must be added */
+  def biopetScript(): Unit = {
+
+    for ((speciesName, c) <- referenceConfig) yield speciesName -> {
+      val speciesConfig = ConfigUtils.any2map(c)
+      val speciesDir = new File(outputDir, speciesName)
+      for ((genomeName, c) <- speciesConfig) yield genomeName -> {
+        var configDeps: List[File] = Nil
+        val genomeConfig = ConfigUtils.any2map(c)
+
+        val genomeDir = new File(speciesDir, genomeName)
+        val fastaFile = new File(genomeDir, "reference.fa")
+        val downloadFastaFile = new File(genomeDir, "download.reference.fa")
+
+        genomeConfig.get("ncbi_assembly_id") match {
+          case Some(assemblyID: String) =>
+            val downloadAssembly = new DownloadNcbiAssembly(this)
+            downloadAssembly.assemblyId = assemblyID
+            downloadAssembly.output = downloadFastaFile
+            downloadAssembly.outputReport = new File(genomeDir, s"$speciesName-$genomeName.assembly.report")
+            downloadAssembly.nameHeader = genomeConfig.get("ncbi_assembly_header_name").map(_.toString)
+            downloadAssembly.mustHaveOne = genomeConfig.get("ncbi_assembly_must_have_one")
+              .map(_.asInstanceOf[util.ArrayList[util.LinkedHashMap[String, String]]])
+              .getOrElse(new util.ArrayList()).flatMap(x => x.map(y => y._1 + "=" + y._2))
+              .toList
+            downloadAssembly.mustNotHave = genomeConfig.get("ncbi_assembly_must_not_have")
+              .map(_.asInstanceOf[util.ArrayList[util.LinkedHashMap[String, String]]])
+              .getOrElse(new util.ArrayList()).flatMap(x => x.map(y => y._1 + "=" + y._2))
+              .toList
+            downloadAssembly.isIntermediate = true
+            add(downloadAssembly)
+          case _ =>
+            val fastaUris = genomeConfig.getOrElse("fasta_uri",
+              throw new IllegalArgumentException(s"No fasta_uri found for $speciesName - $genomeName")) match {
+                case a: Traversable[_]    => a.map(_.toString).toArray
+                case a: util.ArrayList[_] => a.map(_.toString).toArray
+                case a                    => Array(a.toString)
+              }
+
+            val fastaFiles = for (fastaUri <- fastaUris) yield {
+              val curl = new Curl(this)
+              curl.url = fastaUri
+              curl.output = if (fastaUris.length > 1 || fastaUri.endsWith(".gz")) {
+                curl.isIntermediate = true
+                new File(genomeDir, new File(fastaUri).getName)
+              } else fastaFile
+
+              add(curl)
+              add(Md5sum(this, curl.output, genomeDir))
+              curl.output
+            }
+
+            val fastaCat = new FastaMerging(this)
+            fastaCat.output = downloadFastaFile
+            fastaCat.isIntermediate = true
+            if (fastaUris.length > 1 || fastaFiles.exists(_.getName.endsWith(".gz"))) {
+              fastaFiles.foreach { file =>
+                if (file.getName.endsWith(".gz")) {
+                  val zcat = new Zcat(this)
+                  zcat.appending = true
+                  zcat.input :+= file
+                  zcat.output = downloadFastaFile
+                  fastaCat.cmds :+= zcat
+                  fastaCat.input :+= file
+                } else {
+                  val cat = new Cat(this)
+                  cat.appending = true
+                  cat.input :+= file
+                  cat.output = downloadFastaFile
+                  fastaCat.cmds :+= cat
+                  fastaCat.input :+= file
+                }
+              }
+              add(fastaCat)
+              configDeps :+= fastaCat.output
+            }
+        }
+
+        val normalizeFasta = new NormalizeFasta(this)
+        normalizeFasta.input = downloadFastaFile
+        normalizeFasta.output = fastaFile
+        add(normalizeFasta)
+
+        val generateIndexes = new GenerateIndexes(this)
+        generateIndexes.fastaFile = fastaFile
+        generateIndexes.speciesName = speciesName
+        generateIndexes.genomeName = genomeName
+        generateIndexes.outputDir = genomeDir
+        //generateIndexes.fastaUris = fastaUris
+        //TODO: add gtf file
+        add(generateIndexes)
+
+        if (downloadAnnotations) {
+          val annotationDir = new File(genomeDir, "annotation")
+
+          def getAnnotation(tag: String): Map[String, Map[String, Any]] = (genomeConfig.get(tag) match {
+            case Some(s: Map[_, _]) => s.map(x => x._2 match {
+              case o: Map[_, _] => x._1.toString -> o.map(x => (x._1.toString, x._2))
+              case _            => throw new IllegalStateException(s"values in the tag $tag should be json objects")
+            })
+            case None => Map()
+            case x    => throw new IllegalStateException(s"tag $tag should be an object with objects, now $x")
+          })
+
+          // Download vep caches
+          getAnnotation("vep").foreach {
+            case (version, vep) =>
+              val vepDir = new File(annotationDir, "vep" + File.separator + version)
+              val curl = new Curl(this)
+              curl.url = vep("cache_uri").toString
+              curl.output = new File(vepDir, new File(curl.url).getName)
+              curl.isIntermediate = true
+              add(curl)
+
+              val tar = new TarExtract(this)
+              tar.inputTar = curl.output
+              tar.outputDir = vepDir
+              add(tar)
+          }
+
+          getAnnotation("dbsnp").foreach {
+            case (version, dbsnp) =>
+              val dbpsnpDir = new File(annotationDir, "dbsnp")
+              val contigMap = dbsnp.get("dbsnp_contig_map").map(_.asInstanceOf[Map[String, Any]])
+              val contigSed = contigMap.map { map =>
+                val sed = new Sed(this)
+                sed.expressions = map.map(x => s"""s/^${x._1}\t/${x._2}\t/""").toList
+                sed
+              }
+
+              val cv = new CombineVariants(this)
+              cv.reference_sequence = fastaFile
+              def addDownload(uri: String): Unit = {
+                val isZipped = uri.endsWith(".gz")
+                val output = new File(dbpsnpDir, version + "." + new File(uri).getName + (if (isZipped) "" else ".gz"))
+                val curl = new Curl(this)
+                curl.url = uri
+
+                val downloadCmd = (isZipped, contigSed) match {
+                  case (true, Some(sed))  => curl | Zcat(this) | sed | new Bgzip(this) > output
+                  case (false, Some(sed)) => curl | sed | new Bgzip(this) > output
+                  case (true, None)       => curl > output
+                  case (false, None)      => curl | new Bgzip(this) > output
+                }
+                downloadCmd.isIntermediate = true
+                add(downloadCmd)
+
+                val tabix = new Tabix(this)
+                tabix.input = output
+                tabix.p = Some("vcf")
+                tabix.isIntermediate = true
+                add(tabix)
+
+                cv.variant :+= output
+              }
+
+              dbsnp.get("vcf_uri") match {
+                case Some(l: Traversable[_])    => l.foreach(x => addDownload(x.toString))
+                case Some(l: util.ArrayList[_]) => l.foreach(x => addDownload(x.toString))
+                case Some(s)                    => addDownload(s.toString)
+                case None                       => throw new IllegalStateException("Dbsnp should always have a 'vcf_uri' key")
+              }
+
+              cv.out = new File(dbpsnpDir, s"dbsnp.$version.vcf.gz")
+              add(cv)
+          }
+
+          getAnnotation("gene_annotation").foreach {
+            case (version, geneAnnotation) =>
+              val dir = new File(annotationDir, version)
+              val gffFile: Option[File] = geneAnnotation.get("gff_uri").map { gtfUri =>
+                val outputFile = new File(dir, new File(gtfUri.toString).getName.stripSuffix(".gz"))
+                val curl = new Curl(this)
+                curl.url = gtfUri.toString
+                if (gtfUri.toString.endsWith(".gz")) add(curl | Zcat(this) > outputFile)
+                else add(curl > outputFile)
+                outputFile
+              }
+
+              val gtfFile: Option[File] = geneAnnotation.get("gtf_uri") match {
+                case Some(gtfUri) =>
+                  val outputFile = new File(dir, new File(gtfUri.toString).getName.stripSuffix(".gz"))
+                  val curl = new Curl(this)
+                  curl.url = gtfUri.toString
+                  if (gtfUri.toString.endsWith(".gz")) add(curl | Zcat(this) > outputFile)
+                  else add(curl > outputFile)
+                  Some(outputFile)
+                case _ => gffFile.map { gff =>
+                  val gffRead = new GffRead(this)
+                  gffRead.input = gff
+                  gffRead.output = swapExt(dir, gff, ".gff", ".gtf")
+                  add(gffRead)
+                  gffRead.output
+                }
+              }
+
+              val refFlatFile: Option[File] = gtfFile.map { gtf =>
+                val refFlat = new File(gtf + ".refFlat")
+                val gtfToGenePred = new GtfToGenePred(this)
+                gtfToGenePred.inputGtfs :+= gtf
+
+                add(gtfToGenePred | Awk(this, """{ print $12"\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10 }""") > refFlat)
+
+                refFlat
+              }
+          }
+        }
+      }
+    }
+  }
+}
+
+object DownloadGenomes extends PipelineCommand
diff --git a/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/GenerateIndexes.scala b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/GenerateIndexes.scala
index 234c0961729a8ee8884b1113fe05cc7081dc987b..2b25fa7277599f41e38b27273a062df9e3e042df 100644
--- a/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/GenerateIndexes.scala
+++ b/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/GenerateIndexes.scala
@@ -1,301 +1,146 @@
-/**
- * Biopet is built on top of GATK Queue for building bioinformatic
- * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
- * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
- * should also be able to execute Biopet tools and pipelines.
- *
- * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
- *
- * Contact us at: sasc@lumc.nl
- *
- * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
- * license; For commercial users or users who do not want to follow the AGPL
- * license, please contact us to obtain a separate license.
- */
 package nl.lumc.sasc.biopet.pipelines.generateindexes
 
-import java.io.{ File, PrintWriter }
-import java.util
-
-import nl.lumc.sasc.biopet.core.extensions.Md5sum
 import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
-import nl.lumc.sasc.biopet.extensions._
 import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie2Build, BowtieBuild }
+import nl.lumc.sasc.biopet.extensions.{ Ln, Star }
 import nl.lumc.sasc.biopet.extensions.bwa.BwaIndex
-import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
 import nl.lumc.sasc.biopet.extensions.gmap.GmapBuild
 import nl.lumc.sasc.biopet.extensions.hisat.Hisat2Build
 import nl.lumc.sasc.biopet.extensions.picard.CreateSequenceDictionary
 import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFaidx
-import nl.lumc.sasc.biopet.utils.ConfigUtils
 import nl.lumc.sasc.biopet.utils.config.Configurable
 import org.broadinstitute.gatk.queue.QScript
 
-import scala.collection.JavaConversions._
-import scala.language.reflectiveCalls
+import scala.collection.mutable.ListBuffer
 
+/**
+ * Created by pjvan_thof on 21-9-16.
+ */
 class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript {
   def this() = this(null)
 
+  @Input(doc = "Input fasta file", shortName = "R")
+  var fastaFile: File = _
+
+  @Argument(required = true)
+  var speciesName: String = _
+
   @Argument(required = true)
-  var referenceConfigFiles: List[File] = Nil
+  var genomeName: String = _
 
-  var referenceConfig: Map[String, Any] = null
+  @Input(required = false)
+  var gtfFile: Option[File] = None
 
-  protected var configDeps: List[File] = Nil
+  var fastaUris: Array[String] = Array()
 
-  /** This is executed before the script starts */
+  /** Init for pipeline */
   def init(): Unit = {
-    if (referenceConfig == null)
-      referenceConfig = referenceConfigFiles.foldLeft(Map[String, Any]())((a, b) => ConfigUtils.mergeMaps(a, ConfigUtils.fileToConfigMap(b)))
+    if (outputDir == null) outputDir = fastaFile.getParentFile
   }
 
-  /** Method where jobs must be added */
+  /** Pipeline itself */
   def biopetScript(): Unit = {
 
-    val outputConfig = for ((speciesName, c) <- referenceConfig) yield speciesName -> {
-      val speciesConfig = ConfigUtils.any2map(c)
-      val speciesDir = new File(outputDir, speciesName)
-      for ((genomeName, c) <- speciesConfig) yield genomeName -> {
-        var configDeps: List[File] = Nil
-        val genomeConfig = ConfigUtils.any2map(c)
-        val fastaUris = genomeConfig.getOrElse("fasta_uri",
-          throw new IllegalArgumentException(s"No fasta_uri found for $speciesName - $genomeName")) match {
-            case a: Traversable[_]    => a.map(_.toString).toArray
-            case a: util.ArrayList[_] => a.map(_.toString).toArray
-            case a                    => Array(a.toString)
-          }
-
-        val genomeDir = new File(speciesDir, genomeName)
-        val fastaFile = new File(genomeDir, "reference.fa")
-        var outputConfig: Map[String, Any] = Map("reference_fasta" -> fastaFile)
-
-        val fastaFiles = for (fastaUri <- fastaUris) yield {
-          val curl = new Curl(this)
-          curl.url = fastaUri
-          curl.output = if (fastaUris.length > 1 || fastaUri.endsWith(".gz")) {
-            curl.isIntermediate = true
-            new File(genomeDir, new File(fastaUri).getName)
-          } else fastaFile
-
-          add(curl)
-          add(Md5sum(this, curl.output, genomeDir))
-          configDeps :+= curl.output
-          curl.output
-        }
-
-        val fastaCat = new FastaMerging(this)
-        fastaCat.output = fastaFile
-
-        if (fastaUris.length > 1 || fastaFiles.exists(_.getName.endsWith(".gz"))) {
-          fastaFiles.foreach { file =>
-            if (file.getName.endsWith(".gz")) {
-              val zcat = new Zcat(this)
-              zcat.appending = true
-              zcat.input :+= file
-              zcat.output = fastaFile
-              fastaCat.cmds :+= zcat
-              fastaCat.input :+= file
-            } else {
-              val cat = new Cat(this)
-              cat.appending = true
-              cat.input :+= file
-              cat.output = fastaFile
-              fastaCat.cmds :+= cat
-              fastaCat.input :+= file
-            }
-          }
-          add(fastaCat)
-          configDeps :+= fastaCat.output
-        }
-
-        val faidx = SamtoolsFaidx(this, fastaFile)
-        add(faidx)
-        configDeps :+= faidx.output
-
-        val createDict = new CreateSequenceDictionary(this)
-        createDict.reference = fastaFile
-        createDict.output = new File(genomeDir, fastaFile.getName.stripSuffix(".fa") + ".dict")
-        createDict.species = Some(speciesName)
-        createDict.genomeAssembly = Some(genomeName)
-        createDict.uri = Some(fastaUris.mkString(","))
-        add(createDict)
-        configDeps :+= createDict.output
-
-        def createLinks(dir: File): File = {
-          val newFastaFile = new File(dir, fastaFile.getName)
-          val newFai = new File(dir, faidx.output.getName)
-          val newDict = new File(dir, createDict.output.getName)
-
-          add(Ln(this, faidx.output, newFai))
-          add(Ln(this, createDict.output, newDict))
-          val lnFasta = Ln(this, fastaFile, newFastaFile)
-          lnFasta.deps ++= List(newFai, newDict)
-          add(lnFasta)
-          newFastaFile
-        }
-
-        val annotationDir = new File(genomeDir, "annotation")
-
-        genomeConfig.get("vep_cache_uri").foreach { vepCacheUri =>
-          val vepDir = new File(annotationDir, "vep")
-          val curl = new Curl(this)
-          curl.url = vepCacheUri.toString
-          curl.output = new File(vepDir, new File(curl.url).getName)
-          curl.isIntermediate = true
-          add(curl)
-
-          val tar = new TarExtract(this)
-          tar.inputTar = curl.output
-          tar.outputDir = vepDir
-          add(tar)
-
-          val regex = """.*\/(.*)_vep_(\d*)_(.*)\.tar\.gz""".r
-          vepCacheUri.toString match {
-            case regex(species, version, assembly) if version.forall(_.isDigit) =>
-              outputConfig ++= Map("varianteffectpredictor" -> Map(
-                "species" -> species,
-                "assembly" -> assembly,
-                "cache_version" -> version.toInt,
-                "cache" -> vepDir,
-                "fasta" -> createLinks(vepDir)))
-            case _ => throw new IllegalArgumentException("Cache found but no version was found")
-          }
-        }
-
-        genomeConfig.get("dbsnp_vcf_uri").foreach { dbsnpUri =>
-          val cv = new CombineVariants(this)
-          cv.reference_sequence = fastaFile
-          cv.deps ::= createDict.output
-          def addDownload(uri: String): Unit = {
-            val curl = new Curl(this)
-            curl.url = uri
-            curl.output = new File(annotationDir, new File(curl.url).getName)
-            curl.isIntermediate = true
-            add(curl)
-            cv.variant :+= curl.output
-
-            if (curl.output.getName.endsWith(".vcf.gz")) {
-              val tabix = new Tabix(this)
-              tabix.input = curl.output
-              tabix.p = Some("vcf")
-              tabix.isIntermediate = true
-              add(tabix)
-              configDeps :+= tabix.outputIndex
-            }
-          }
-
-          dbsnpUri match {
-            case l: Traversable[_]    => l.foreach(x => addDownload(x.toString))
-            case l: util.ArrayList[_] => l.foreach(x => addDownload(x.toString))
-            case _                    => addDownload(dbsnpUri.toString)
-          }
-
-          cv.out = new File(annotationDir, "dbsnp.vcf.gz")
-          add(cv)
-          outputConfig += "dbsnp" -> cv.out
-        }
-
-        val gtfFile: Option[File] = genomeConfig.get("gtf_uri").map { gtfUri =>
-          val outputFile = new File(annotationDir, new File(gtfUri.toString).getName.stripSuffix(".gz"))
-          val curl = new Curl(this)
-          curl.url = gtfUri.toString
-          if (gtfUri.toString.endsWith(".gz")) add(curl | Zcat(this) > outputFile)
-          else add(curl > outputFile)
-          outputConfig += "annotation_gtf" -> outputFile
-          outputFile
-        }
-
-        val refFlatFile: Option[File] = gtfFile.map { gtf =>
-          val refFlat = new File(gtf + ".refFlat")
-          val gtfToGenePred = new GtfToGenePred(this)
-          gtfToGenePred.inputGtfs :+= gtf
-
-          add(gtfToGenePred | Awk(this, """{ print $12"\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10 }""") > refFlat)
-
-          outputConfig += "annotation_refflat" -> refFlat
-          refFlat
-        }
-
-        // Bwa index
-        val bwaIndex = new BwaIndex(this)
-        bwaIndex.reference = createLinks(new File(genomeDir, "bwa"))
-        add(bwaIndex)
-        configDeps :+= bwaIndex.jobOutputFile
-        outputConfig += "bwa" -> Map("reference_fasta" -> bwaIndex.reference.getAbsolutePath)
-
-        // Gmap index
-        val gmapDir = new File(genomeDir, "gmap")
-        val gmapBuild = new GmapBuild(this)
-        gmapBuild.dir = gmapDir
-        gmapBuild.db = genomeName
-        gmapBuild.fastaFiles ::= createLinks(gmapDir)
-        add(gmapBuild)
-        configDeps :+= gmapBuild.jobOutputFile
-        outputConfig += "gsnap" -> Map("dir" -> gmapBuild.dir.getAbsolutePath, "db" -> genomeName)
-        outputConfig += "gmap" -> Map("dir" -> gmapBuild.dir.getAbsolutePath, "db" -> genomeName)
-
-        // STAR index
-        val starDir = new File(genomeDir, "star")
-        val starIndex = new Star(this)
-        starIndex.outputDir = starDir
-        starIndex.reference = createLinks(starDir)
-        starIndex.runmode = "genomeGenerate"
-        starIndex.sjdbGTFfile = gtfFile
-        add(starIndex)
-        configDeps :+= starIndex.jobOutputFile
-        outputConfig += "star" -> Map(
-          "reference_fasta" -> starIndex.reference.getAbsolutePath,
-          "genomeDir" -> starDir.getAbsolutePath
-        )
-
-        // Bowtie index
-        val bowtieIndex = new BowtieBuild(this)
-        bowtieIndex.reference = createLinks(new File(genomeDir, "bowtie"))
-        bowtieIndex.baseName = "reference"
-        add(bowtieIndex)
-        configDeps :+= bowtieIndex.jobOutputFile
-        outputConfig += "bowtie" -> Map("reference_fasta" -> bowtieIndex.reference.getAbsolutePath)
-
-        // Bowtie2 index
-        val bowtie2Index = new Bowtie2Build(this)
-        bowtie2Index.reference = createLinks(new File(genomeDir, "bowtie2"))
-        bowtie2Index.baseName = "reference"
-        add(bowtie2Index)
-        configDeps :+= bowtie2Index.jobOutputFile
-        outputConfig += "bowtie2" -> Map("reference_fasta" -> bowtie2Index.reference.getAbsolutePath)
-        outputConfig += "tophat" -> Map(
-          "bowtie_index" -> bowtie2Index.reference.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta")
-        )
-
-        // Hisat2 index
-        val hisat2Index = new Hisat2Build(this)
-        hisat2Index.inputFasta = createLinks(new File(genomeDir, "hisat2"))
-        hisat2Index.hisat2IndexBase = new File(new File(genomeDir, "hisat2"), "reference").getAbsolutePath
-        add(hisat2Index)
-        configDeps :+= hisat2Index.jobOutputFile
-        outputConfig += "hisat2" -> Map(
-          "reference_fasta" -> hisat2Index.inputFasta.getAbsolutePath,
-          "hisat_index" -> hisat2Index.inputFasta.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta")
-        )
-
-        val writeConfig = new WriteConfig
-        writeConfig.deps = configDeps
-        writeConfig.out = new File(genomeDir, s"$speciesName-$genomeName.json")
-        writeConfig.config = Map("references" -> Map(speciesName -> Map(genomeName -> outputConfig)))
-        add(writeConfig)
-
-        this.configDeps :::= configDeps
-        outputConfig
-      }
+    var outputConfig: Map[String, Any] = Map("reference_fasta" -> fastaFile)
+    val configDeps = new ListBuffer[File]()
+
+    val faidx = SamtoolsFaidx(this, fastaFile)
+    add(faidx)
+    configDeps += faidx.output
+
+    val createDict = new CreateSequenceDictionary(this)
+    createDict.reference = fastaFile
+    createDict.output = new File(outputDir, fastaFile.getName.stripSuffix(".fa") + ".dict")
+    createDict.species = Some(speciesName)
+    createDict.genomeAssembly = Some(genomeName)
+    if (fastaUris.nonEmpty) createDict.uri = Some(fastaUris.mkString(","))
+    add(createDict)
+    configDeps += createDict.output
+
+    def createLinks(dir: File): File = {
+      val newFastaFile = new File(dir, fastaFile.getName)
+      val newFai = new File(dir, faidx.output.getName)
+      val newDict = new File(dir, createDict.output.getName)
+
+      add(Ln(this, faidx.output, newFai))
+      add(Ln(this, createDict.output, newDict))
+      val lnFasta = Ln(this, fastaFile, newFastaFile)
+      lnFasta.deps ++= List(newFai, newDict)
+      add(lnFasta)
+      newFastaFile
     }
 
+    // Bwa index
+    val bwaIndex = new BwaIndex(this)
+    bwaIndex.reference = createLinks(new File(outputDir, "bwa"))
+    add(bwaIndex)
+    configDeps += bwaIndex.jobOutputFile
+    outputConfig += "bwa" -> Map("reference_fasta" -> bwaIndex.reference.getAbsolutePath)
+
+    // Gmap index
+    val gmapDir = new File(outputDir, "gmap")
+    val gmapBuild = new GmapBuild(this)
+    gmapBuild.dir = gmapDir
+    gmapBuild.db = genomeName
+    gmapBuild.fastaFiles ::= createLinks(gmapDir)
+    add(gmapBuild)
+    configDeps += gmapBuild.jobOutputFile
+    outputConfig += "gsnap" -> Map("dir" -> gmapBuild.dir.getAbsolutePath, "db" -> genomeName)
+    outputConfig += "gmap" -> Map("dir" -> gmapBuild.dir.getAbsolutePath, "db" -> genomeName)
+
+    // STAR index
+    val starDir = new File(outputDir, "star")
+    val starIndex = new Star(this)
+    starIndex.outputDir = starDir
+    starIndex.reference = createLinks(starDir)
+    starIndex.runmode = "genomeGenerate"
+    starIndex.sjdbGTFfile = gtfFile
+    add(starIndex)
+    configDeps += starIndex.jobOutputFile
+    outputConfig += "star" -> Map(
+      "reference_fasta" -> starIndex.reference.getAbsolutePath,
+      "genomeDir" -> starDir.getAbsolutePath
+    )
+
+    // Bowtie index
+    val bowtieIndex = new BowtieBuild(this)
+    bowtieIndex.reference = createLinks(new File(outputDir, "bowtie"))
+    bowtieIndex.baseName = "reference"
+    add(bowtieIndex)
+    configDeps += bowtieIndex.jobOutputFile
+    outputConfig += "bowtie" -> Map("reference_fasta" -> bowtieIndex.reference.getAbsolutePath)
+
+    // Bowtie2 index
+    val bowtie2Index = new Bowtie2Build(this)
+    bowtie2Index.reference = createLinks(new File(outputDir, "bowtie2"))
+    bowtie2Index.baseName = "reference"
+    add(bowtie2Index)
+    configDeps += bowtie2Index.jobOutputFile
+    outputConfig += "bowtie2" -> Map("reference_fasta" -> bowtie2Index.reference.getAbsolutePath)
+    outputConfig += "tophat" -> Map(
+      "bowtie_index" -> bowtie2Index.reference.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta")
+    )
+
+    // Hisat2 index
+    val hisat2Index = new Hisat2Build(this)
+    hisat2Index.inputFasta = createLinks(new File(outputDir, "hisat2"))
+    hisat2Index.hisat2IndexBase = new File(new File(outputDir, "hisat2"), "reference").getAbsolutePath
+    add(hisat2Index)
+    configDeps += hisat2Index.jobOutputFile
+    outputConfig += "hisat2" -> Map(
+      "reference_fasta" -> hisat2Index.inputFasta.getAbsolutePath,
+      "hisat_index" -> hisat2Index.inputFasta.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta")
+    )
+
     val writeConfig = new WriteConfig
-    writeConfig.deps = configDeps
-    writeConfig.out = new File(outputDir, "references.json")
-    writeConfig.config = Map("references" -> outputConfig)
+    writeConfig.deps = configDeps.toList
+    writeConfig.out = configFile
+    writeConfig.config = Map("references" -> Map(speciesName -> Map(genomeName -> outputConfig)))
     add(writeConfig)
+
   }
+
+  def configFile = new File(outputDir, s"$speciesName-$genomeName.json")
 }
 
-object GenerateIndexes extends PipelineCommand
+object GenerateIndexes extends PipelineCommand
\ No newline at end of file
diff --git a/generate-indexes/src/test/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/GenerateIndexesTest.scala b/generate-indexes/src/test/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/DownloadGenomesTest.scala
similarity index 50%
rename from generate-indexes/src/test/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/GenerateIndexesTest.scala
rename to generate-indexes/src/test/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/DownloadGenomesTest.scala
index 5931c7e0e827be453141f191d4f05005e27931c5..331798087b5c7bf26f9fc7ec87ed79403f3f272c 100644
--- a/generate-indexes/src/test/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/GenerateIndexesTest.scala
+++ b/generate-indexes/src/test/scala/nl/lumc/sasc/biopet/pipelines/generateindexes/DownloadGenomesTest.scala
@@ -25,18 +25,18 @@ import org.testng.annotations.Test
 /**
  * Created by pjvan_thof on 13-5-16.
  */
-class GenerateIndexesTest extends TestNGSuite with Matchers {
-  def initPipeline(map: Map[String, Any]): GenerateIndexes = {
-    new GenerateIndexes() {
+class DownloadGenomesTest extends TestNGSuite with Matchers {
+  def initPipeline(map: Map[String, Any]): DownloadGenomes = {
+    new DownloadGenomes() {
       override def configNamespace = "generateindexes"
-      override def globalConfig = new Config(ConfigUtils.mergeMaps(map, GenerateIndexesTest.config))
+      override def globalConfig = new Config(ConfigUtils.mergeMaps(map, DownloadGenomesTest.config))
       qSettings = new QSettings
       qSettings.runName = "test"
     }
   }
 
   @Test
-  def testNoFastaUri: Unit = {
+  def testNoFastaUri(): Unit = {
     val pipeline = initPipeline(Map())
     pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("" -> "")))
     intercept[IllegalArgumentException] {
@@ -45,57 +45,101 @@ class GenerateIndexesTest extends TestNGSuite with Matchers {
   }
 
   @Test
-  def testSingleFasta: Unit = {
+  def testNcbiAssembly(): Unit = {
+    val pipeline = initPipeline(Map())
+    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("ncbi_assembly_id" -> "id")))
+    pipeline.script()
+  }
+
+  @Test
+  def testSingleFasta(): Unit = {
     val pipeline = initPipeline(Map())
     pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri")))
     pipeline.script()
   }
 
   @Test
-  def testMultiFasta: Unit = {
+  def testMultiFasta(): Unit = {
     val pipeline = initPipeline(Map())
     pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> List("uri", "uri2", "uri3.gz"))))
     pipeline.script()
   }
 
   @Test
-  def testSingleDbsnp: Unit = {
-    val pipeline = initPipeline(Map())
-    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri", "dbsnp_vcf_uri" -> "uri.vcf.gz")))
+  def testSingleDbsnp(): Unit = {
+    val pipeline = initPipeline(Map("download_annotations" -> true))
+    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri",
+      "dbsnp" -> Map("version" -> Map("vcf_uri" -> "uri.vcf.gz")))))
     pipeline.script()
   }
 
   @Test
-  def testMultiDbsnp: Unit = {
-    val pipeline = initPipeline(Map())
-    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri", "dbsnp_vcf_uri" -> List("uri.vcf.gz", "uri2.vcf.gz"))))
+  def testContigMapDbsnp(): Unit = {
+    val pipeline = initPipeline(Map("download_annotations" -> true))
+    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri",
+      "dbsnp" -> Map("version" -> Map("vcf_uri" -> "uri.vcf.gz", "contig_map" -> Map("1" -> "chr1"))))))
     pipeline.script()
   }
 
   @Test
-  def testVep: Unit = {
-    val pipeline = initPipeline(Map())
-    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri", "vep_cache_uri" -> "something/human_vep_80_hg19.tar.gz")))
+  def testUnzippedContigMapDbsnp(): Unit = {
+    val pipeline = initPipeline(Map("download_annotations" -> true))
+    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri",
+      "dbsnp" -> Map("version" -> Map("vcf_uri" -> "uri.vcf", "contig_map" -> Map("1" -> "chr1"))))))
     pipeline.script()
   }
 
   @Test
-  def testGtfZipped: Unit = {
-    val pipeline = initPipeline(Map())
-    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri", "gtf_uri" -> "bla.gf.gz")))
+  def testSingleUnzippedDbsnp(): Unit = {
+    val pipeline = initPipeline(Map("download_annotations" -> true))
+    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri",
+      "dbsnp" -> Map("version" -> Map(("vcf_uri" -> "uri.vcf"))))))
     pipeline.script()
   }
 
   @Test
-  def testGtf: Unit = {
-    val pipeline = initPipeline(Map())
-    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri", "gtf_uri" -> "bla.gf")))
+  def testMultiDbsnp(): Unit = {
+    val pipeline = initPipeline(Map("download_annotations" -> true))
+    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri",
+      "dbsnp" -> Map("version" -> Map("vcf_uri" -> List("uri.vcf.gz", "uri2.vcf.gz"))))))
+    pipeline.script()
+  }
+
+  @Test
+  def testVep(): Unit = {
+    val pipeline = initPipeline(Map("download_annotations" -> true))
+    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri",
+      "vep" -> Map("version" -> Map("cache_uri" -> "something/human_vep_80_hg19.tar.gz")))))
+    pipeline.script()
+  }
+
+  @Test
+  def testGtfZipped(): Unit = {
+    val pipeline = initPipeline(Map("download_annotations" -> true))
+    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri",
+      "gene_annotation" -> Map("version" -> Map("gtf_uri" -> "bla.gf.gz")))))
+    pipeline.script()
+  }
+
+  @Test
+  def testGtf(): Unit = {
+    val pipeline = initPipeline(Map("download_annotations" -> true))
+    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri",
+      "gene_annotation" -> Map("version" -> Map("gtf_uri" -> "bla.gf")))))
+    pipeline.script()
+  }
+
+  @Test
+  def testGff(): Unit = {
+    val pipeline = initPipeline(Map("download_annotations" -> true))
+    pipeline.referenceConfig = Map("s1" -> Map("g1" -> Map("fasta_uri" -> "uri",
+      "gene_annotation" -> Map("version" -> Map("gff_uri" -> "bla.gf")))))
     pipeline.script()
   }
 
 }
 
-object GenerateIndexesTest {
+object DownloadGenomesTest {
   val outputDir = Files.createTempDir()
   outputDir.deleteOnExit()
 
@@ -109,6 +153,7 @@ object GenerateIndexesTest {
     "samtools" -> Map("exe" -> "test"),
     "md5sum" -> Map("exe" -> "test"),
     "gatk_jar" -> "test",
-    "tabix" -> Map("exe" -> "test")
+    "tabix" -> Map("exe" -> "test"),
+    "gffread" -> Map("exe" -> "test")
   )
 }