Commit 4387eae8 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'feature-gentrap' into 'develop'

Feature gentrap

See merge request !111
parents b4acd8fd 41f9df22
......@@ -72,7 +72,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
add(baiLn)
val bamLn = Ln(this, files.head, bamFile)
bamLn.deps :+= baiLn.out
bamLn.deps :+= baiLn.output
add(bamLn)
}
List(bamFile)
......
......@@ -108,7 +108,7 @@
<dependency>
<groupId>com.github.scopt</groupId>
<artifactId>scopt_2.10</artifactId>
<version>3.2.0</version>
<version>3.3.0</version>
</dependency>
</dependencies>
</project>
......@@ -83,7 +83,13 @@ trait SummaryQScript extends BiopetQScript {
def addChecksum(file: File): Unit = {
if (writeSummary.md5sum && !SummaryQScript.md5sumCache.contains(file)) {
val md5sum = Md5sum(this, file)
val md5sum = new Md5sum(this) {
override def configName = "md5sum"
override def cmdLine: String = super.cmdLine + " || " +
required("echo") + required("error_on_capture " + input.toString) + " > " + required(output)
}
md5sum.input = file
md5sum.output = new File(file.getParentFile, file.getName + ".md5")
// Need to not write a md5 file outside the outputDir
if (!file.getAbsolutePath.startsWith(outputDir.getAbsolutePath))
......
......@@ -22,45 +22,50 @@ import nl.lumc.sasc.biopet.core.config.Configurable
/**
* Wrapper for the cufflinks command line tool.
* Written based on cufflinks version v2.2.1.
* Written based on cufflinks version v2.2.1 (md5: 07c831c4f8b4e161882731ea5694ff80)
*/
class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction {
/** default executable */
executable = config("exe", default = "cufflinks")
/** default threads */
override val defaultThreads = 8
/** default vmem for cluster jobs */
override val defaultVmem = "5G"
/** input file */
@Input(doc = "Input file (SAM or BAM)", required = true)
var input: File = _
var input: File = null
/** output files, computed automatically from output directory */
@Output(doc = "Output GTF file")
lazy val output_gtf: File = {
if (input == null || output_dir == null)
throw new RuntimeException("Unexpected error when trying to set cufflinks GTF output")
lazy val outputGtf: File = {
require(input != null && output_dir != null,
"Can not set Cufflinks GTF output while input file and/or output directory is not defined")
// cufflinks always outputs a transcripts.gtf file in the output directory
new File(output_dir + File.pathSeparator + "transcripts.gtf")
new File(output_dir, "transcripts.gtf")
}
@Output(doc = "Output isoform FPKM file")
lazy val output_isoforms_fpkm: File = {
if (input == null || output_dir == null)
throw new RuntimeException("Unexpected error when trying to set cufflinks isoform FPKM output")
// cufflinks always outputs a isoforms.fpkm_tracking file in the output directory
new File(output_dir + File.pathSeparator + "isoforms.fpkm_tracking")
lazy val outputIsoformsFpkm: File = {
require(input != null && output_dir != null,
"Can not set Cufflinks isoforms.fpkm_tracking output while input file and/or output directory is not defined")
new File(output_dir, "isoforms.fpkm_tracking")
}
@Output(doc = "Output GTF file")
lazy val output_genes_fpkm: File = {
if (input == null || output_dir == null)
throw new RuntimeException("Unexpected error when trying to set cufflinks genes FPKM output")
lazy val outputGenesFpkm: File = {
require(input != null && output_dir != null,
"Can not set Cufflinks genes.fpkm_tracking output while input file and/or output directory is not defined")
// cufflinks always outputs a genes.fpkm_tracking file in the output directory
new File(output_dir + File.pathSeparator + "genes.fpkm_tracking")
new File(output_dir, "genes.fpkm_tracking")
}
/** write all output files to this directory [./] */
var output_dir: String = _
var output_dir: File = config("output_dir", default = new File("."))
/** number of threads used during analysis [1] */
var num_threads: Option[Int] = config("num_threads")
......@@ -69,25 +74,25 @@ class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction {
var seed: Option[Int] = config("seed")
/** quantitate against reference transcript annotations */
var GTF: File = config("GTF")
var GTF: Option[File] = config("GTF")
/** use reference transcript annotation to guide assembly */
var GTF_guide: File = config("GTF_guide")
var GTF_guide: Option[File] = config("GTF_guide")
/** ignore all alignment within transcripts in this file */
var mask_file: File = config("mask_file")
var mask_file: Option[File] = config("mask_file")
/** use bias correction - reference fasta required [NULL] */
var frag_bias_correct: String = config("frag_bias_correct")
var frag_bias_correct: Option[String] = config("frag_bias_correct")
/** use 'rescue method' for multi-reads (more accurate) [FALSE] */
var multi_read_correct: Boolean = config("multi_read_correct")
var multi_read_correct: Boolean = config("multi_read_correct", default = false)
/** library prep used for input reads [below] */
var library_type: String = config("library_type")
var library_type: Option[String] = config("library_type")
/** Method used to normalize library sizes [below] */
var library_norm_method: String = config("library_norm_method")
var library_norm_method: Option[String] = config("library_norm_method")
/** average fragment length (unpaired reads only) [200] */
var frag_len_mean: Option[Int] = config("frag_len_mean")
......@@ -99,10 +104,10 @@ class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction {
var max_mle_iterations: Option[Int] = config("max_mle_iterations")
/** count hits compatible with reference RNAs only [FALSE] */
var compatible_hits_norm: Boolean = config("compatible_hits_norm")
var compatible_hits_norm: Boolean = config("compatible_hits_norm", default = false)
/** count all hits for normalization [TRUE] */
var total_hits_norm: Boolean = config("total_hits_norm")
var total_hits_norm: Boolean = config("total_hits_norm", default = true)
/** Number of fragment generation samples [100] */
var num_frag_count_draws: Option[Int] = config("num_frag_count_draws")
......@@ -111,16 +116,16 @@ class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction {
var num_frag_assign_draws: Option[Int] = config("num_frag_assign_draws")
/** Maximum number of alignments allowed per fragment [unlim] */
var max_frag_multihits: String = config("max_frag_multihits")
var max_frag_multihits: Option[Int] = config("max_frag_multihits")
/** No effective length correction [FALSE] */
var no_effective_length_correction: Boolean = config("no_effective_length_correction")
var no_effective_length_correction: Boolean = config("no_effective_length_correction", default = false)
/** No length correction [FALSE] */
var no_length_correction: Boolean = config("no_length_correction")
var no_length_correction: Boolean = config("no_length_correction", default = false)
/** assembled transcripts have this ID prefix [CUFF] */
var label: String = config("label")
var label: Option[String] = config("label")
/** suppress transcripts below this abundance level [0.10] */
var min_isoform_fraction: Option[Float] = config("min_isoform_fraction")
......@@ -165,7 +170,7 @@ class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction {
var overlap_radius: Option[Int] = config("overlap_radius")
/** disable tiling by faux reads [FALSE] */
var no_faux_reads: Boolean = config("no_faux_reads")
var no_faux_reads: Boolean = config("no_faux_reads", default = false)
/** overhang allowed on 3' end when merging with reference [600] */
var flag_3_overhang_tolerance: Option[Int] = config("flag_3_overhang_tolerance")
......@@ -174,39 +179,44 @@ class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction {
var intron_overhang_tolerance: Option[Int] = config("intron_overhang_tolerance")
/** log-friendly verbose processing (no progress bar) [FALSE] */
var verbose: Boolean = config("verbose")
var verbose: Boolean = config("verbose", default = false)
/** log-friendly quiet processing (no progress bar) [FALSE] */
var quiet: Boolean = config("quiet")
var quiet: Boolean = config("quiet", default = false)
/** do not contact server to check for update availability [FALSE] */
var no_update_check: Boolean = config("no_update_check")
var no_update_check: Boolean = config("no_update_check", default = false)
override val versionRegex = """cufflinks v(.*)""".r
override def versionCommand = executable
override val versionExitcode = List(0, 1)
def cmdLine = {
override def beforeGraph: Unit = {
threads = num_threads.getOrElse(1)
}
def cmdLine =
required(executable) +
required("--output-dir", output_dir) +
optional("--num-threads", num_threads) +
optional("--num-threads", threads) +
optional("--seed", seed) +
optional("--GTF", GTF) +
optional("--GTF-guide", GTF_guide) +
optional("--mask-file", mask_file) +
optional("--frag-bias-correct", frag_bias_correct) +
optional("--multi-read-correct", multi_read_correct) +
conditional(multi_read_correct, "--multi-read-correct") +
optional("--library-type", library_type) +
optional("--library-norm-method", library_norm_method) +
optional("--frag-len-mean", frag_len_mean) +
optional("--frag-len-std-dev", frag_len_std_dev) +
optional("--max-mle-iterations", max_mle_iterations) +
optional("--compatible-hits-norm", compatible_hits_norm) +
optional("--total-hits-norm", total_hits_norm) +
conditional(compatible_hits_norm, "--compatible-hits-norm") +
conditional(total_hits_norm, "--total-hits-norm") +
optional("--num-frag-count-draws", num_frag_count_draws) +
optional("--num-frag-assign-draws", num_frag_assign_draws) +
optional("--max-frag-multihits", max_frag_multihits) +
optional("--no-effective-length-correction", no_effective_length_correction) +
optional("--no-length-correction", no_length_correction) +
conditional(no_effective_length_correction, "--no-effective-length-correction") +
conditional(no_length_correction, "--no-length-correction") +
optional("--label", label) +
optional("--min-isoform-fraction", min_isoform_fraction) +
optional("--pre-mrna-fraction", pre_mrna_fraction) +
......@@ -222,12 +232,11 @@ class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction {
optional("--trim-3-dropoff-frac", trim_3_dropoff_frac) +
optional("--max-multiread-fraction", max_multiread_fraction) +
optional("--overlap-radius", overlap_radius) +
optional("--no-faux-reads", no_faux_reads) +
conditional(no_faux_reads, "--no-faux-reads") +
optional("--flag-3-overhang-tolerance", flag_3_overhang_tolerance) +
optional("--intron-overhang-tolerance", intron_overhang_tolerance) +
optional("--verbose", verbose) +
optional("--quiet", quiet) +
optional("--no-update-check", no_update_check) +
conditional(verbose, "--verbose") +
conditional(quiet, "--quiet") +
conditional(no_update_check, "--no-update-check") +
required(input)
}
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Wrapper for the cuffquant command line tool.
* Written based on cuffquant version v2.2.1 (md5: 0765b82b11db9256f5be341a7da884d6)
*/
class Cuffquant(val root: Configurable) extends BiopetCommandLineFunction {
/** default executable */
executable = config("exe", default = "cuffquant")
/** input file */
@Input(doc = "Input file (SAM or BAM)", required = true) /*
in cuffquant this input: sample1_rep1.sam,sample1_rep2.sam sample2_rep1.sam,sample2_rep2.sam
means we have 2 samples, each with 2 replicates
so our input is a list of lists of Files
*/
var input: List[List[File]] = List.empty[List[File]]
/** input GTF file */
@Input(doc = "Input GTF file", required = true)
var transcripts_gtf: File = null
/** output file, computed automatically from output directory */
@Output(doc = "Output CXB file")
lazy val outputCxb: File = {
require(output_dir != null,
"Can not set Cuffquant CXB output while input file and/or output directory is not defined")
// cufflinks always outputs a transcripts.gtf file in the output directory
new File(output_dir, "abundances.cxb")
}
/** write all output files to this directory [./] */
var output_dir: File = config("output_dir", default = new File("."))
/** ignore all alignment within transcripts in this file */
var mask_file: Option[File] = config("mask_file")
/** use bias correction - reference fasta required [NULL] */
var frag_bias_correct: Option[String] = config("frag_bias_correct")
/** use 'rescue method' for multi-reads (more accurate) [FALSE] */
var multi_read_correct: Boolean = config("multi_read_correct", default = false)
/** number of threads used during analysis [1] */
var num_threads: Option[Int] = config("num_threads")
/** library prep used for input reads [below] */
var library_type: Option[String] = config("library_type")
/** average fragment length (unpaired reads only) [200] */
var frag_len_mean: Option[Int] = config("frag_len_mean")
/** fragment length std deviation (unpaired reads only) [80] */
var frag_len_std_dev: Option[Int] = config("frag_len_std_dev")
/** minimum number of alignments in a locus for testing [10] */
var min_alignment_count: Option[Int] = config("min_alignment_count")
/** maximum iterations allowed for MLE calculation [5000] */
var max_mle_iterations: Option[Int] = config("max_mle_iterations")
/** log-friendly verbose processing (no progress bar) [FALSE] */
var verbose: Boolean = config("verbose", default = false)
/** log-friendly quiet processing (no progress bar) [FALSE] */
var quiet: Boolean = config("quiet", default = false)
/** value of random number generator seed [0] */
var seed: Option[Int] = config("seed")
/** do not contact server to check for update availability [FALSE] */
var no_update_check: Boolean = config("no_update_check", default = false)
/** maximum fragments allowed in a bundle before skipping [500000] */
var max_bundle_frags: Option[Int] = config("max_bundle_frags")
/** Maximum number of alignments allowed per fragment [unlim] */
var max_frag_multihits: Option[Int] = config("max_frag_multihits")
/** No effective length correction [FALSE] */
var no_effective_length_correction: Boolean = config("no_effective_length_correction", default = false)
/** No length correction [FALSE] */
var no_length_correction: Boolean = config("no_length_correction", default = false)
/** Skip a random subset of reads this size [0.0] */
var read_skip_fraction: Option[Double] = config("read_skip_fraction")
/** Break all read pairs [FALSE] */
var no_read_pairs: Boolean = config("no_read_pairs", default = false)
/** Trim reads to be this long (keep 5' end) [none] */
var trim_read_length: Option[Int] = config("trim_read_length")
/** Disable SCV correction */
var no_scv_correction: Boolean = config("no_scv_correction", default = false)
override val versionRegex = """cuffquant v(.*)""".r
override def versionCommand = executable
override val versionExitcode = List(0, 1)
def cmdLine =
required(executable) +
required("--output-dir", output_dir) +
optional("--mask-file", mask_file) +
optional("--frag-bias-correct", frag_bias_correct) +
conditional(multi_read_correct, "--multi-read-correct") +
optional("--num-threads", num_threads) +
optional("--library-type", library_type) +
optional("--frag-len-mean", frag_len_mean) +
optional("--frag-len-std-dev", frag_len_std_dev) +
optional("--min-alignment-count", min_alignment_count) +
optional("--max-mle-iterations", max_mle_iterations) +
conditional(verbose, "--verbose") +
conditional(quiet, "--quiet") +
optional("--seed", seed) +
conditional(no_update_check, "--no-update-check") +
optional("--max-bundle-frags", max_bundle_frags) +
optional("--max-frag-multihits", max_frag_multihits) +
conditional(no_effective_length_correction, "--no-effective-length-correction") +
conditional(no_length_correction, "--no-length-correction") +
optional("--read-skip-fraction", read_skip_fraction) +
conditional(no_read_pairs, "--no-read-pairs") +
optional("--trim-read-length", trim_read_length) +
conditional(no_scv_correction, "--no-scv-correction") +
required(transcripts_gtf) +
required(input.map(_.mkString(";").mkString(" ")))
}
/**
* Copyright (c) 2014 Leiden University Medical Center
*
* @author Wibowo Arindrarto
*/
package nl.lumc.sasc.biopet.extensions
import java.io.File
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
/**
* Wrapper for the gsnap command line tool
* Written based on gsnap version 2014-05-15
*/
class Gsnap(val root: Configurable) extends BiopetCommandLineFunction {
/** default executable */
executable = config("exe", default = "gsnap", freeVar = false)
/** default threads */
override val defaultThreads = 8
/** default vmem for cluster jobs */
override val defaultVmem = "6G"
/** input file */
@Input(doc = "Input FASTQ file(s)", required = true) //var input: List[File] = _
var input: List[File] = List.empty[File]
/** output file */
@Output(doc = "Output alignment file", required = true)
var output: File = null
/** genome directory */
@Argument(doc = "Directory of genome database")
var dir: Option[File] = config("dir")
/** genome database */
@Argument(doc = "Genome database name", required = true)
var db: String = config("db")
/** whether to use a suffix array, which will give increased speed */
var use_sarray: Option[Int] = config("use_sarray")
/** kmer size to use in genome database (allowed values: 16 or less) */
var kmer: Option[Int] = config("kmer")
/** sampling to use in genome database */
var sampling: Option[Int] = config("sampling")
/** process only the i-th out of every n sequences */
var part: Option[String] = config("part")
/** size of input buffer (program reads this many sequences at a time)*/
var input_buffer_size: Option[Int] = config("input_buffer_size")
/** amount of barcode to remove from start of read */
var barcode_length: Option[Int] = config("barcode_length")
/** orientation of paired-end reads */
var orientation: Option[String] = config("orientation")
/** starting position of identifier in fastq header, space-delimited (>= 1) */
var fastq_id_start: Option[Int] = config("fastq_id_start")
/** ending position of identifier in fastq header, space-delimited (>= 1) */
var fastq_id_end: Option[Int] = config("fastq_id_end")
/** when multiple fastq files are provided on the command line, gsnap assumes */
var force_single_end: Boolean = config("force_single_end", default = false)
/** skips reads marked by the illumina chastity program. expecting a string */
var filter_chastity: Option[String] = config("filter_chastity")
/** allows accession names of reads to mismatch in paired-end files */
var allow_pe_name_mismatch: Boolean = config("allow_pe_name_mismatch", default = false)
/** uncompress gzipped input files */
var gunzip: Boolean = config("gunzip", default = false)
/** uncompress bzip2-compressed input files */
var bunzip2: Boolean = config("bunzip2", default = false)
/** batch mode (default = 2) */
var batch: Option[Int] = config("batch")
/** whether to expand the genomic offsets index */
var expand_offsets: Option[Int] = config("expand_offsets")
/** maximum number of mismatches allowed (if not specified, then */
var max_mismatches: Option[Float] = config("max_mismatches")
/** whether to count unknown (n) characters in the query as a mismatch */
var query_unk_mismatch: Option[Int] = config("query_unk_mismatch")
/** whether to count unknown (n) characters in the genome as a mismatch */
var genome_unk_mismatch: Option[Int] = config("genome_unk_mismatch")
/** maximum number of alignments to find (default 1000) */
var maxsearch: Option[Int] = config("maxsearch")
/** threshold for computing a terminal alignment (from one end of the */
var terminal_threshold: Option[Int] = config("terminal_threshold")
/** threshold alignment length in bp for a terminal alignment result to be printed (in bp) */
var terminal_output_minlength: Option[Int] = config("terminal_output_minlength")
/** penalty for an indel (default 2) */
var indel_penalty: Option[Int] = config("indel_penalty")
/** minimum length at end required for indel alignments (default 4) */
var indel_endlength: Option[Int] = config("indel_endlength")
/** maximum number of middle insertions allowed (default 9) */
var max_middle_insertions: Option[Int] = config("max_middle_insertions")
/** maximum number of middle deletions allowed (default 30) */
var max_middle_deletions: Option[Int] = config("max_middle_deletions")
/** maximum number of end insertions allowed (default 3) */
var max_end_insertions: Option[Int] = config("max_end_insertions")
/** maximum number of end deletions allowed (default 6) */
var max_end_deletions: Option[Int] = config("max_end_deletions")
/** report suboptimal hits beyond best hit (default 0) */
var suboptimal_levels: Option[Int] = config("suboptimal_levels")
/** method for removing adapters from reads. currently allowed values: off, paired */
var adapter_strip: Option[String] = config("adapter_strip")
/** score to use for mismatches when trimming at ends (default is -3; */
var trim_mismatch_score: Option[Int] = config("trim_mismatch_score")
/** score to use for indels when trimming at ends (default is -4; */
var trim_indel_score: Option[Int] = config("trim_indel_score")
/** directory for snps index files (created using snpindex) (default is */
var snpsdir: Option[String] = config("snpsdir")
/** use database containing known snps (in <string>.iit, built */
var use_snps: Option[String] = config("use_snps")
/** directory for methylcytosine index files (created using cmetindex) */
var cmetdir: Option[String] = config("cmetdir")
/** directory for a-to-i rna editing index files (created using atoiindex) */
var atoidir: Option[String] = config("atoidir")
/** alignment mode: standard (default), cmet-stranded, cmet-nonstranded, */
var mode: Option[String] = config("mode")