Skip to content
Snippets Groups Projects
Commit 838eead1 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added createDict

parent 7b8f4dd5
No related branches found
No related tags found
No related merge requests found
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.picard
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
class CreateSequenceDictionary(val root: Configurable) extends Picard {
javaMainClass = new picard.sam.CreateSequenceDictionary().getClass.getName
@Input(required = true)
var reference: File = _
@Output(required = true)
var output: File = _
var genomeAssembly: Option[String] = config("genomeAssembly")
var uri: Option[String] = config("uri")
var species: Option[String] = config("species")
var truncateAtWhiteSpace: Boolean = config("truncateAtWhiteSpace", default = false)
var numSequences: Option[Int] = config("numSequences")
override def commandLine = super.commandLine +
required("REFERENCE=", reference, spaceSeparated = false) +
required("OUTPUT=", output, spaceSeparated = false) +
optional("GENOME_ASSEMBLY=", genomeAssembly, spaceSeparated = false) +
optional("URI=", uri, spaceSeparated = false) +
optional("SPECIES=", species, spaceSeparated = false) +
conditional(truncateAtWhiteSpace, "TRUNCATE_NAMES_AT_WHITESPACE=true") +
optional("NUM_SEQUENCES=", numSequences, spaceSeparated = false)
}
......@@ -19,6 +19,7 @@ import java.io.File
import nl.lumc.sasc.biopet.core.{PipelineCommand, BiopetQScript}
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.picard.CreateSequenceDictionary
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFaidx
import nl.lumc.sasc.biopet.extensions.{Zcat, Curl}
import nl.lumc.sasc.biopet.utils.ConfigUtils
......@@ -49,22 +50,28 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript
val fastaUrl = genomeConfig.getOrElse("fasta_url",
throw new IllegalArgumentException(s"No fasta_url found for $speciesName - $genomeName")).toString
val genomeDir = new File(speciesDir, genomeName)
val fastaFile = new File(genomeDir, "reference.fa")
val genomeDir = new File(speciesDir, genomeName)
val fastaFile = new File(genomeDir, "reference.fa")
val curl = new Curl(this)
curl.url = fastaUrl
if (fastaUrl.endsWith(".gz")) {
curl.output = new File(genomeDir, "reference.fa.gz")
curl.isIntermediate = true
add(Zcat(this, curl.output, fastaFile))
} else curl.output = fastaFile
add(curl)
val curl = new Curl(this)
curl.url = fastaUrl
if (fastaUrl.endsWith(".gz")) {
curl.output = new File(genomeDir, "reference.fa.gz")
curl.isIntermediate = true
add(Zcat(this, curl.output, fastaFile))
} else curl.output = fastaFile
add(curl)
val faidx = SamtoolsFaidx(this, fastaFile)
add(faidx)
val faidx = SamtoolsFaidx(this, fastaFile)
add(faidx)
//TODO: dict
val createDict = new CreateSequenceDictionary(this)
createDict.reference = fastaFile
createDict.output = new File(genomeDir, fastaFile.getName.stripSuffix(".fa") + ".dict")
createDict.species = Some(speciesName)
createDict.genomeAssembly = Some(genomeName)
createDict.uri = Some(fastaUrl)
add(createDict)
//TODO: other indexes
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment