Added createDict

838eead1 · Peter van 't Hof · 7b8f4dd5 · 838eead1 · 838eead1
Commit 838eead1 authored 9 years ago by Peter van 't Hof
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CreateSequenceDictionary.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CreateSequenceDictionary.scala
+/**
+ * Biopet is built on top of GATK Queue for building bioinformatic
+ * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
+ * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
+ * should also be able to execute Biopet tools and pipelines.
+ *
+ * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
+ *
+ * Contact us at: sasc@lumc.nl
+ *
+ * A dual licensing mode is applied. The source code within this project that are
+ * not part of GATK Queue is freely available for non-commercial use under an AGPL
+ * license; For commercial users or users who do not want to follow the AGPL
+ * license, please contact us to obtain a separate license.
+ */
+package nl.lumc.sasc.biopet.extensions.picard
+
+import java.io.File
+
+import nl.lumc.sasc.biopet.core.config.Configurable
+import org.broadinstitute.gatk.utils.commandline.{Input, Output}
+
+class CreateSequenceDictionary(val root: Configurable) extends Picard {
+
+  javaMainClass = new picard.sam.CreateSequenceDictionary().getClass.getName
+
+  @Input(required = true)
+  var reference: File = _
+
+  @Output(required = true)
+  var output: File = _
+
+  var genomeAssembly: Option[String] = config("genomeAssembly")
+  var uri: Option[String] = config("uri")
+  var species: Option[String] = config("species")
+  var truncateAtWhiteSpace: Boolean = config("truncateAtWhiteSpace", default = false)
+  var numSequences: Option[Int] = config("numSequences")
+
+  override def commandLine = super.commandLine +
+    required("REFERENCE=", reference, spaceSeparated = false) +
+    required("OUTPUT=", output, spaceSeparated = false) +
+    optional("GENOME_ASSEMBLY=", genomeAssembly, spaceSeparated = false) +
+    optional("URI=", uri, spaceSeparated = false) +
+    optional("SPECIES=", species, spaceSeparated = false) +
+    conditional(truncateAtWhiteSpace, "TRUNCATE_NAMES_AT_WHITESPACE=true") +
+    optional("NUM_SEQUENCES=", numSequences, spaceSeparated = false)
+}
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala
@@ -19,6 +19,7 @@ import java.io.File

 import nl.lumc.sasc.biopet.core.{PipelineCommand, BiopetQScript}
 import nl.lumc.sasc.biopet.core.config.Configurable
+import nl.lumc.sasc.biopet.extensions.picard.CreateSequenceDictionary
 import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFaidx
 import nl.lumc.sasc.biopet.extensions.{Zcat, Curl}
 import nl.lumc.sasc.biopet.utils.ConfigUtils
@@ -49,22 +50,28 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript
        val fastaUrl = genomeConfig.getOrElse("fasta_url",
          throw new IllegalArgumentException(s"No fasta_url found for $speciesName - $genomeName")).toString

-          val genomeDir = new File(speciesDir, genomeName)
-          val fastaFile = new File(genomeDir, "reference.fa")
+        val genomeDir = new File(speciesDir, genomeName)
+        val fastaFile = new File(genomeDir, "reference.fa")

-          val curl = new Curl(this)
-          curl.url = fastaUrl
-          if (fastaUrl.endsWith(".gz")) {
-            curl.output = new File(genomeDir, "reference.fa.gz")
-            curl.isIntermediate = true
-            add(Zcat(this, curl.output, fastaFile))
-          } else curl.output = fastaFile
-          add(curl)
+        val curl = new Curl(this)
+        curl.url = fastaUrl
+        if (fastaUrl.endsWith(".gz")) {
+          curl.output = new File(genomeDir, "reference.fa.gz")
+          curl.isIntermediate = true
+          add(Zcat(this, curl.output, fastaFile))
+        } else curl.output = fastaFile
+        add(curl)

-          val faidx = SamtoolsFaidx(this, fastaFile)
-          add(faidx)
+        val faidx = SamtoolsFaidx(this, fastaFile)
+        add(faidx)

-          //TODO: dict
+        val createDict = new CreateSequenceDictionary(this)
+        createDict.reference = fastaFile
+        createDict.output = new File(genomeDir, fastaFile.getName.stripSuffix(".fa") + ".dict")
+        createDict.species = Some(speciesName)
+        createDict.genomeAssembly = Some(genomeName)
+        createDict.uri = Some(fastaUrl)
+        add(createDict)

        //TODO: other indexes
      }