Commit 30ef4896 authored by bow's avatar bow

Add initial hisat2 wrapper

parent e5d79321
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.hisat
import java.io.File
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version }
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Extension for hisat2
*
* Based on version 2.0.4
*/
class Hisat2(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
// TODO: handle --sra-acc flag. This is currently unsupported by the wrapper.
@Input(doc = "Fastq file R1", shortName = "R1")
var R1: File = null
@Input(doc = "Fastq file R2", shortName = "R2", required = false)
var R2: Option[File] = None
@Output(doc = "Output file SAM", shortName = "output", required = true)
var output: File = null
executable = config("exe", default = "hisat2", freeVar = false)
def versionRegex = """.*hisat2-align-s version (.*)""".r
override def versionExitcode = List(0, 1)
def versionCommand = executable + " --version"
override def defaultCoreMemory = 4.0
override def defaultThreads = 4
/* Required */
var hisat2Index: String = config("hisat2_index")
/* Input options */
var q: Boolean = config("q", default = false)
var qseq: Boolean = config("qseq", default = false)
var f: Boolean = config("f", default = false)
var r: Boolean = config("r", default = false)
var c: Boolean = config("c", default = false)
var skip: Option[Int] = config("skip")
var upto: Option[Int] = config("upto")
var trim5: Option[Int] = config("trim5")
var trim3: Option[Int] = config("trim3")
var phred33: Boolean = config("phred33", default = false)
var phred64: Boolean = config("phred64", default = false)
var intQuals: Boolean = config("int_quals", default = false)
/* Alignment options */
var nCeil: Option[String] = config("n_ceil")
var ignoreQuals: Boolean = config("ignore_quals", default = false)
var nofw: Boolean = config("nofw", default = false)
var norc: Boolean = config("norc", default = false)
/* Spliced alignment */
var penCansplice: Option[Int] = config("pen_cansplice")
var penNoncansplice: Option[Int] = config("pen_noncansplice")
var penCanintronlen: Option[Int] = config("pen_canintronlen")
var penNoncanintronlen: Option[Int] = config("pen_noncanintronlen")
var minIntronlen: Option[Int] = config("min_intronlen")
var maxIntronlen: Option[Int] = config("max_intronlen")
var knownSplicesiteInfile: Option[File] = config("known_splicesite_infile")
var novelSplicesiteOutfile: Option[File] = config("novel_splicesite_outfile")
var novelSplicesiteInfile: Option[File] = config("novel_splicesite_infile")
var noTempSplicesite: Boolean = config("no_temp_splicesite", default = false)
var noSplicedAlignment: Boolean = config("no_spliced_alignment", default = false)
var rnaStrandness: Option[String] = config("rna_strandness")
var tmo: Boolean = config("tmo", default = false)
var dta: Boolean = config("dta", default = false)
var dtaCufflinks: Boolean = config("dta_cufflinks", default = false)
/* Scoring */
var ma: Option[Int] = config("ma")
var mp: Option[String] = config("mp") // TODO: should be (Int, Int)
var sp: Option[String] = config("sp") // TODO: should be (Int, Int)
var np: Option[Int] = config("np")
var rdg: Option[String] = config("rdg") // TODO: should be (Int, Int)
var rfg: Option[String] = config("rfg") // TODO: should be (Int, Int)
var scoreMin: Option[String] = config("score_min")
/* Reporting */
var k: Option[Int] = config("k")
var all: Option[Int] = config("all")
/* Paired-end */
var fr: Boolean = config("fr", default = false)
var rf: Boolean = config("rf", default = false)
var ff: Boolean = config("ff", default = false)
var noMixed: Boolean = config("no_mixed", default = false)
var noDiscordant: Boolean = config("no_discordant", default = false)
/* Output */
var time: Boolean = config("no_overlap", default = false)
var un: Option[String] = config("un")
var al: Option[String] = config("al")
var unConc: Option[String] = config("un_conc")
var alConc: Option[String] = config("al_conc")
var unGz: Option[String] = config("un_gz")
var alGz: Option[String] = config("al_gz")
var unConcGz: Option[String] = config("un_conc_gz")
var alConcGz: Option[String] = config("al_conc_gz")
var unBz2: Option[String] = config("un_bz2")
var alBz2: Option[String] = config("al_bz2")
var unConcBz2: Option[String] = config("un_conc_bz2")
var alConcBz2: Option[String] = config("al_conc_bz2")
var quiet: Boolean = config("quiet", default = false)
var metFile: Option[String] = config("met_file")
var metStderr: Boolean = config("met_stderr", default = false)
var met: Option[Int] = config("met")
var noHead: Boolean = config("no_head", default = false)
var noSq: Boolean = config("no_sq", default = false)
var rgId: Option[String] = config("rg_id")
var rg: List[String] = config("rg", default = Nil)
var omitSecSeq: Boolean = config("omit_sec_seq", default = false)
/* Performance */
var offrate: Option[Int] = config("offrate")
var reorder: Boolean = config("reorder", default = false)
var mm: Boolean = config("mm", default = false)
/* Other */
var qcFilter: Boolean = config("qc_filter", default = false)
var seed: Option[Int] = config("seed")
var nonDeterministic: Boolean = config("non_deterministic", default = false)
var removeChrName: Boolean = config("remove_chrname", default = false)
var addChrName: Boolean = config("add_chrname", default = false)
override def beforeGraph() {
super.beforeGraph()
val indexDir = new File(hisat2Index).getParentFile
val basename = hisat2Index.stripPrefix(indexDir.getPath + File.separator)
if (indexDir.exists()) {
if (!indexDir.list()
.toList
.filter(_.startsWith(basename))
.exists({ p =>
p.endsWith(".bt2") || p.endsWith(".bt2l")
}))
Logging.addError(s"No index files found for hisat2 in: $indexDir with basename: $basename")
}
}
/** return commandline to execute */
def cmdLine = required(executable) +
conditional(q, "-q") +
conditional(qseq, "--qseq") +
conditional(f, "-f") +
conditional(r, "-r") +
conditional(c, "-c") +
optional("--skip", skip) +
optional("--upto", upto) +
optional("--trim3", trim3) +
optional("--trim5", trim5) +
conditional(phred33, "--phred33") +
conditional(phred64, "--phred64") +
conditional(intQuals, "--int-quals") +
/* Alignment options */
optional("--n-ceil", nCeil) +
conditional(ignoreQuals, "--ignore-quals") +
conditional(nofw, "--nofw") +
conditional(norc, "--norc") +
/* Spliced alignment */
optional("--pen-cansplice", penCansplice) +
optional("--pen-noncansplice", penNoncansplice) +
optional("--pen-canintronlen", penCanintronlen) +
optional("--pen-noncanintronlen", penNoncanintronlen) +
optional("--min-intronlen", minIntronlen) +
optional("--max-intronlen", maxIntronlen) +
optional("--known-splicesite-infile", knownSplicesiteInfile) +
optional("--novel-splicesite-outfile", novelSplicesiteOutfile) +
optional("--novel-splicesite-infile", novelSplicesiteInfile) +
conditional(noTempSplicesite, "--no-temp-splicesite") +
conditional(noSplicedAlignment, "--no-spliced-alignment") +
optional("--rna-strandness", rnaStrandness) +
conditional(tmo, "--tmo") +
conditional(dta, "--dta") +
conditional(dtaCufflinks, "--dta-cufflinks") +
/* Scoring */
optional("--ma", ma) +
optional("--mp", mp) +
optional("--np", np) +
optional("--sp", sp) +
optional("--rdg", rdg) +
optional("--rfg", rfg) +
optional("--score-min", scoreMin) +
/* Reporting */
optional("-k", k) +
optional("--all", all) +
/* Paired End */
conditional(fr, "--fr") +
conditional(rf, "--rf") +
conditional(ff, "--ff") +
conditional(noMixed, "--no-mixed") +
conditional(noDiscordant, "--no-discordant") +
/* Output */
conditional(time, "--time") +
optional("--un", un) +
optional("--al", al) +
optional("--un-conc", unConc) +
optional("--al-conc", alConc) +
optional("--un-gz", unGz) +
optional("--al-gz", alGz) +
optional("--un-conc-gz", unConcGz) +
optional("--al-conc-gz", alConcGz) +
optional("--un-bz2", unBz2) +
optional("--al-bz2", alBz2) +
optional("--un-conc-bz2", unConcBz2) +
optional("--al-conc-bz2", alConcBz2) +
conditional(quiet, "--quiet") +
optional("--met-file", metFile) +
conditional(metStderr, "--met-stderr") +
optional("--met", met) +
conditional(noHead, "--no-head") +
conditional(noSq, "--no-sq") +
optional("--rg-id", rgId) +
repeat("--rg", rg) +
conditional(omitSecSeq, "--omit-sec-seq") +
/* Performance */
optional("--offrate", offrate) +
optional("--threads", threads) +
conditional(reorder, "--reorder") +
conditional(mm, "--mm") +
/* Other */
conditional(qcFilter, "--qc-filter") +
optional("--seed", seed) +
conditional(nonDeterministic, "--non-deterministic") +
conditional(removeChrName, "--remove-chrname") +
conditional(addChrName, "--add-chrname") +
/* Required */
required("-x", hisat2Index) +
(R2 match {
case Some(r2) => required("-1", R1) + optional("-2", r2)
case otherwise => required("-U", R1)
}) +
(if (outputAsStsout) "" else required("-S", output))
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment