Commit 85116b8c authored by Peter van 't Hof's avatar Peter van 't Hof

Added normalize fasta

parent c6da35a1
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.picard
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
/**
* Created by sajvanderzeeuw on 6-10-15.
*/
class NormalizeFasta(val root: Configurable) extends Picard {
javaMainClass = new picard.reference.NormalizeFasta().getClass.getName
@Input(doc = "The input fasta file", required = true)
var input: File = _
@Output(doc = "The output fasta file", required = true)
var output: File = _
val lineLength: Int = config("line_length")
val truncateSequenceNameAtWhitespace: Boolean = config("truncate_sequence_name_at_whitespace", default = false)
override def cmdLine = super.cmdLine +
(if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false)
else required("INPUT=", input, spaceSeparated = false)) +
(if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
else required("OUTPUT=", output, spaceSeparated = false)) +
required("LINE_LENGTH=", output, spaceSeparated = false) +
conditional(truncateSequenceNameAtWhitespace, "TRUNCATE_SEQUENCE_NAMES_AT_WHITESPACE=TRUE")
}
......@@ -29,7 +29,6 @@ class DownloadNcbiAssembly(val root: Configurable) extends ToolCommandFunction {
@Output(doc = "Output fasta file", required = true)
var output: File = _
@Output(doc = "Output NCBI report", required = true)
var outputReport: File = _
var assemblyId: String = null
......
......@@ -10,8 +10,8 @@ import org.testng.annotations.Test
import scala.io.Source
/**
* Created by pjvanthof on 03/10/16.
*/
* Created by pjvanthof on 03/10/16.
*/
class DownloadNcbiAssemblyTest extends TestNGSuite with Matchers {
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
......
......@@ -18,9 +18,10 @@ import java.io.File
import java.util
import nl.lumc.sasc.biopet.core.extensions.Md5sum
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.{BiopetQScript, PipelineCommand}
import nl.lumc.sasc.biopet.extensions._
import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
import nl.lumc.sasc.biopet.extensions.picard.NormalizeFasta
import nl.lumc.sasc.biopet.extensions.tools.DownloadNcbiAssembly
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
......@@ -39,6 +40,8 @@ class DownloadGenomes(val root: Configurable) extends QScript with BiopetQScript
override def fixedValues = Map("gffread" -> Map("T" -> true))
override def defaults = Map("normalizefasta" -> Map("line_length" -> 60))
val downloadAnnotations: Boolean = config("download_annotations", default = false)
/** This is executed before the script starts */
......@@ -59,12 +62,13 @@ class DownloadGenomes(val root: Configurable) extends QScript with BiopetQScript
val genomeDir = new File(speciesDir, genomeName)
val fastaFile = new File(genomeDir, "reference.fa")
val downloadFastaFile = new File(genomeDir, "download.reference.fa")
genomeConfig.get("ncbi_assembly_id") match {
case Some(assemblyID: String) =>
val downloadAssembly = new DownloadNcbiAssembly(this)
downloadAssembly.assemblyId = assemblyID
downloadAssembly.output = fastaFile
downloadAssembly.output = downloadFastaFile
downloadAssembly.outputReport = new File(genomeDir, s"$speciesName-$genomeName.assembly.report")
downloadAssembly.nameHeader = genomeConfig.get("ncbi_assembly_header_name").map(_.toString)
downloadAssembly.mustHaveOne = genomeConfig.get("ncbi_assembly_must_have_one")
......@@ -75,6 +79,7 @@ class DownloadGenomes(val root: Configurable) extends QScript with BiopetQScript
.map(_.asInstanceOf[util.ArrayList[util.LinkedHashMap[String, String]]])
.getOrElse(new util.ArrayList()).flatMap(x => x.map(y => y._1 + "=" + y._2))
.toList
downloadAssembly.isIntermediate = true
add(downloadAssembly)
case _ =>
val fastaUris = genomeConfig.getOrElse("fasta_uri",
......@@ -98,8 +103,8 @@ class DownloadGenomes(val root: Configurable) extends QScript with BiopetQScript
}
val fastaCat = new FastaMerging(this)
fastaCat.output = fastaFile
fastaCat.output = downloadFastaFile
fastaCat.isIntermediate = true
if (fastaUris.length > 1 || fastaFiles.exists(_.getName.endsWith(".gz"))) {
fastaFiles.foreach { file =>
if (file.getName.endsWith(".gz")) {
......@@ -123,6 +128,11 @@ class DownloadGenomes(val root: Configurable) extends QScript with BiopetQScript
}
}
val normalizeFasta = new NormalizeFasta(this)
normalizeFasta.input = downloadFastaFile
normalizeFasta.output = fastaFile
add(normalizeFasta)
val generateIndexes = new GenerateIndexes(this)
generateIndexes.fastaFile = fastaFile
generateIndexes.speciesName = speciesName
......@@ -138,10 +148,10 @@ class DownloadGenomes(val root: Configurable) extends QScript with BiopetQScript
def getAnnotation(tag: String): Map[String, Map[String, Any]] = (genomeConfig.get(tag) match {
case Some(s: Map[_, _]) => s.map(x => x._2 match {
case o: Map[_, _] => x._1.toString -> o.map(x => (x._1.toString, x._2))
case _ => throw new IllegalStateException("values in the tag vep should be json objects")
case _ => throw new IllegalStateException(s"values in the tag $tag should be json objects")
})
case None => Map()
case x => throw new IllegalStateException(s"tag vep should be an object with objects, now $x")
case x => throw new IllegalStateException(s"tag $tag should be an object with objects, now $x")
})
// Download vep caches
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment