Commit a49ae40e authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added pickOtus step

parent 1a5f1520
package nl.lumc.sasc.biopet.extensions.qiime
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
/**
* Created by pjvan_thof on 12/4/15.
*/
class PickOtus(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "pick_otus.py")
@Input(required = true)
var inputFasta: File = _
var outputDir: File = null
override def defaultThreads = 2
override def defaultCoreMemory = 5.0
var otu_picking_method: Option[String] = config("otu_picking_method")
var clustering_algorithm: Option[String] = config("clustering_algorithm")
var max_cdhit_memory: Option[Int] = config("max_cdhit_memory")
var refseqs_fp: Option[String] = config("refseqs_fp")
var blast_db: Option[String] = config("blast_db")
var max_e_value_blast: Option[String] = config("max_e_value_blast")
var sortmerna_db: Option[String] = config("sortmerna_db")
var sortmerna_e_value: Option[Double] = config("sortmerna_e_value")
var sortmerna_coverage: Option[Double] = config("sortmerna_coverage")
var sortmerna_tabular: Boolean = config("sortmerna_tabular", default = false)
var sortmerna_best_N_alignments: Option[Int] = config("sortmerna_best_N_alignments")
var sortmerna_max_pos: Option[Int] = config("sortmerna_max_pos")
var min_aligned_percent: Option[Double] = config("min_aligned_percent")
var similarity: Option[Double] = config("similarity")
var sumaclust_exact: Option[String] = config("sumaclust_exact")
var sumaclust_l: Boolean = config("sumaclust_l", default = false)
var denovo_otu_id_prefix: Option[String] = config("denovo_otu_id_prefix")
var swarm_resolution: Option[String] = config("swarm_resolution")
var trie_reverse_seqs: Boolean = config("trie_reverse_seqs", default = false)
var prefix_prefilter_length: Option[String] = config("prefix_prefilter_length")
var trie_prefilter: Option[String] = config("trie_prefilter")
var prefix_length: Option[String] = config("prefix_length")
var suffix_length: Option[String] = config("suffix_length")
var enable_rev_strand_match: Boolean = config("enable_rev_strand_match", default = false)
var suppress_presort_by_abundance_uclust: Boolean = config("suppress_presort_by_abundance_uclust", default = false)
var optimal_uclust: Boolean = config("optimal_uclust", default = false)
var exact_uclust: Boolean = config("exact_uclust", default = false)
var user_sort: Boolean = config("user_sort", default = false)
var suppress_new_clusters: Boolean = config("suppress_new_clusters", default = false)
var max_accepts: Option[String] = config("max_accepts")
var max_rejects: Option[String] = config("max_rejects")
var stepwords: Option[String] = config("stepwords")
var word_length: Option[String] = config("word_length")
var suppress_uclust_stable_sort: Boolean = config("suppress_uclust_stable_sort", default = false)
var suppress_prefilter_exact_match: Boolean = config("suppress_prefilter_exact_match", default = false)
var save_uc_files: Boolean = config("save_uc_files", default = false)
var percent_id_err: Option[String] = config("percent_id_err")
var minsize: Option[String] = config("minsize")
var abundance_skew: Option[String] = config("abundance_skew")
var db_filepath: Option[String] = config("db_filepath")
var perc_id_blast: Option[String] = config("perc_id_blast")
var de_novo_chimera_detection: Boolean = config("de_novo_chimera_detection", default = false)
var suppress_de_novo_chimera_detection: Boolean = config("suppress_de_novo_chimera_detection", default = false)
var reference_chimera_detection: Option[String] = config("reference_chimera_detection")
var suppress_reference_chimera_detection: Option[String] = config("suppress_reference_chimera_detection")
var cluster_size_filtering: Option[String] = config("cluster_size_filtering")
var suppress_cluster_size_filtering: Option[String] = config("suppress_cluster_size_filtering")
var remove_usearch_logs: Boolean = config("remove_usearch_logs", default = false)
var derep_fullseq: Boolean = config("derep_fullseq", default = false)
var non_chimeras_retention: Option[String] = config("non_chimeras_retention")
var minlen: Option[String] = config("minlen")
var usearch_fast_cluster: Boolean = config("usearch_fast_cluster", default = false)
var usearch61_sort_method: Option[String] = config("usearch61_sort_method")
var sizeorder: Boolean = config("sizeorder", default = false)
def cmdLine = executable +
required("-i", inputFasta) +
required("-o", outputDir) +
optional("-m", otu_picking_method) +
optional("-c", clustering_algorithm) +
optional("-M", max_cdhit_memory) +
optional("-r", refseqs_fp) +
optional("-b", blast_db) +
optional("-e", max_e_value_blast) +
optional("--sortmerna_db", sortmerna_db) +
optional("--sortmerna_e_value", sortmerna_e_value) +
optional("--sortmerna_coverage", sortmerna_coverage) +
conditional(sortmerna_tabular, "--sortmerna_tabular") +
optional("--sortmerna_best_N_alignments", sortmerna_best_N_alignments) +
optional("--sortmerna_max_pos", sortmerna_max_pos) +
optional("--min_aligned_percent", min_aligned_percent) +
optional("--similarity", similarity) +
optional("--sumaclust_exact", sumaclust_exact) +
conditional(sumaclust_l, "--sumaclust_l") +
optional("--denovo_otu_id_prefix", denovo_otu_id_prefix) +
optional("--swarm_resolution", swarm_resolution) +
conditional(trie_reverse_seqs, "--trie_reverse_seqs") +
optional("--prefix_prefilter_length", prefix_prefilter_length) +
optional("--trie_prefilter", trie_prefilter) +
optional("--prefix_length", prefix_length) +
optional("--suffix_length", suffix_length) +
conditional(enable_rev_strand_match, "--enable_rev_strand_match") +
conditional(suppress_presort_by_abundance_uclust, "--suppress_presort_by_abundance_uclust") +
conditional(optimal_uclust, "--optimal_uclust") +
conditional(exact_uclust, "--exact_uclust") +
conditional(user_sort, "--user_sort") +
conditional(suppress_new_clusters, "--suppress_new_clusters") +
optional("--max_accepts", max_accepts) +
optional("--max_rejects", max_rejects) +
optional("--stepwords", stepwords) +
optional("--word_length", word_length) +
conditional(suppress_uclust_stable_sort, "--suppress_uclust_stable_sort") +
conditional(suppress_prefilter_exact_match, "--suppress_prefilter_exact_match") +
conditional(save_uc_files, "--save_uc_files") +
optional("--percent_id_err", percent_id_err) +
optional("--minsize", minsize) +
optional("--abundance_skew", abundance_skew) +
optional("--db_filepath", db_filepath) +
optional("--perc_id_blast", perc_id_blast) +
conditional(de_novo_chimera_detection, "--de_novo_chimera_detection") +
conditional(suppress_de_novo_chimera_detection, "--suppress_de_novo_chimera_detection") +
optional("--reference_chimera_detection", reference_chimera_detection) +
optional("--suppress_reference_chimera_detection", suppress_reference_chimera_detection) +
optional("--cluster_size_filtering", cluster_size_filtering) +
optional("--suppress_cluster_size_filtering", suppress_cluster_size_filtering) +
conditional(remove_usearch_logs, "--remove_usearch_logs") +
conditional(derep_fullseq, "--derep_fullseq") +
optional("--non_chimeras_retention", non_chimeras_retention) +
optional("--minlen", minlen) +
optional("--usearch_fast_cluster", usearch_fast_cluster) +
optional("--usearch61_sort_method", usearch61_sort_method) +
conditional(sizeorder, "--sizeorder") +
optional("--threads", threads)
}
......@@ -7,8 +7,8 @@ import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvanthof on 04/12/15.
*/
* Created by pjvanthof on 04/12/15.
*/
class ExtractUnmappedReads(val root: Configurable) extends QScript with BiopetQScript {
var bamFile: File = _
......
package nl.lumc.sasc.biopet.pipelines.gears
import nl.lumc.sasc.biopet.core.{PipelineCommand, MultiSampleQScript}
import nl.lumc.sasc.biopet.core.{ PipelineCommand, MultiSampleQScript }
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvanthof on 03/12/15.
*/
* Created by pjvanthof on 03/12/15.
*/
class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qscript =>
def this() = this(null)
......@@ -25,16 +25,16 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
}
/**
* Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
*/
* Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
*/
def addMultiSampleJobs(): Unit = {
}
/**
* Factory method for Sample class
* @param id SampleId
* @return Sample class
*/
* Factory method for Sample class
* @param id SampleId
* @return Sample class
*/
def makeSample(id: String): Sample = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
......@@ -44,10 +44,10 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
}
/**
* Factory method for Library class
* @param id SampleId
* @return Sample class
*/
* Factory method for Library class
* @param id SampleId
* @return Sample class
*/
def makeLibrary(id: String): Library = new Library(id)
class Library(libId: String) extends AbstractLibrary(libId) {
......
......@@ -2,14 +2,14 @@ package nl.lumc.sasc.biopet.pipelines.gears
import nl.lumc.sasc.biopet.core.SampleLibraryTag
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.kraken.{KrakenReport, Kraken}
import nl.lumc.sasc.biopet.extensions.kraken.{ KrakenReport, Kraken }
import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvanthof on 04/12/15.
*/
* Created by pjvanthof on 04/12/15.
*/
class GearsKraken(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
var fastqR1: File = _
......@@ -74,6 +74,6 @@ class GearsKraken(val root: Configurable) extends QScript with SummaryQScript wi
/** Statistics shown in the summary file */
def summaryFiles: Map[String, File] = outputFiles + ("input_R1" -> fastqR1) ++ (fastqR2 match {
case Some(file) => Map("input_R1" -> file)
case _ => Map()
case _ => Map()
})
}
package nl.lumc.sasc.biopet.pipelines.gears
import nl.lumc.sasc.biopet.core.BiopetQScript
import nl.lumc.sasc.biopet.extensions.qiime.PickOtus
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvan_thof on 12/4/15.
*/
* Created by pjvan_thof on 12/4/15.
*/
class GearsQiimeRatx(val root: Configurable) extends QScript with BiopetQScript {
var fastaR1: File = _
......@@ -18,6 +19,9 @@ class GearsQiimeRatx(val root: Configurable) extends QScript with BiopetQScript
}
def biopetScript() = {
val pickOtus = new PickOtus(this)
pickOtus.inputFasta = fastaR1
pickOtus.outputDir = new File(outputDir, "pick_otus")
add(pickOtus)
}
}
......@@ -79,8 +79,8 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
/** Method to add jobs */
def biopetScript(): Unit = {
val (r1: File, r2: Option[File]) = (fastqR1, fastqR2, bamFile) match {
case (Some(r1), r2, _) => (r1, r2)
val (r1, r2): (File, Option[File]) = (fastqR1, fastqR2, bamFile) match {
case (Some(r1), _, _) => (r1, fastqR2)
case (_, _, Some(bam)) =>
val extract = new ExtractUnmappedReads(this)
extract.outputDir = outputDir
......@@ -90,11 +90,11 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
extract.biopetScript()
addAll(extract.functions)
(extract.fastqUnmappedR1, Some(extract.fastqUnmappedR2))
case _ => Logging.addError("Missing input files")
case _ => throw new IllegalArgumentException("Missing input files")
}
lazy val fastqR1 = fastqToFasta(r1, outputName + ".R1")
lazy val fastqR2 = r2.map(fastqToFasta(_, outputName + ".R2"))
lazy val fastaR1 = fastqToFasta(r1, outputName + ".R1")
lazy val fastaR2 = r2.map(fastqToFasta(_, outputName + ".R2"))
if (gearsUseKraken) {
val kraken = new GearsKraken(this)
......@@ -110,7 +110,12 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
if (gearsUserQiimeRtax) {
val qiimeRatx = new GearsQiimeRatx(this)
qiimeRatx.outputDir = new File(outputDir, "qiime_ratx")
qiimeRatx.fastaR1 = fastaR1
qiimeRatx.fastqR2 = fastaR2
qiimeRatx.init()
qiimeRatx.biopetScript()
addAll(qiimeRatx.functions)
}
addSummaryJobs()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment