Commit 7657016c authored by Peter van 't Hof's avatar Peter van 't Hof

Added open reference method

parent 7c40ad23
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.qiime
import java.io.File
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
/**
* Created by pjvan_thof on 12/4/15.
*/
class PickOpenReferenceOtus(val root: Configurable) extends BiopetCommandLineFunction with Version {
executable = config("exe", default = "pick_open_reference_otus.py")
@Input(required = true)
var inputFasta: File = _
var outputDir: File = null
override def defaultThreads = 1
override def defaultCoreMemory = 20.0
def versionCommand = executable + " --version"
def versionRegex = """Version: (.*)""".r
@Input(required = false)
var parameterFp: Option[File] = config("parameter_fp")
@Input(required = false)
var referenceFp: Option[File] = config("reference_fp")
@Input(required = false)
var taxonomyFp: Option[File] = config("taxonomy_fp")
var force: Boolean = config("force", default = false)
var printOnly: Boolean = config("print_only", default = false)
var suppressTaxonomyAssignment: Boolean = config("suppress_taxonomy_assignment", default = false)
var percentSubsample: Option[Double] = config("percent_subsample")
var prefilterPercentId: Option[Double] = config("prefilter_percent_id")
var suppressStep4: Boolean = config("suppress_step4", default = false)
var minOtuSize: Option[Int] = config("min_otu_size")
var suppressAlignAndTree: Boolean = config("suppress_taxonomy_assignment", default = false)
def otuTable = new File(outputDir, "otu_table_mc2.biom")
def failedOtuTable = new File(outputDir, "otu_table_mc2_w_tax_no_pynast_failures.biom")
def otuMap = new File(outputDir, "final_otu_map.txt")
@Output
private var outputFiles: List[File] = Nil
override def beforeGraph(): Unit = {
super.beforeGraph()
jobOutputFile = new File(outputDir, ".std.out")
outputFiles ::= otuTable
outputFiles ::= failedOtuTable
outputFiles ::= otuMap
}
def cmdLine = executable + required("-f") +
required("-i", inputFasta) +
required("-o", outputDir) +
optional("--reference_fp", referenceFp) +
optional("--parameter_fp", parameterFp) +
optional("--taxonomy_fp", taxonomyFp) +
conditional(force, "--force") +
conditional(printOnly, "--print_only") +
conditional(suppressTaxonomyAssignment, "--suppress_taxonomy_assignment") +
(if (threads > 1) required("-a") + required("-O", threads) else "") +
optional("--percent_subsample", percentSubsample) +
optional("--prefilter_percent_id", prefilterPercentId) +
conditional(suppressStep4, "--suppress_step4") +
optional("-min_otu_size", minOtuSize) +
conditional(suppressAlignAndTree, "--suppress_align_and_tree")
}
......@@ -53,22 +53,30 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
}
def qiimeClosedDir: Option[File] = {
if (samples.values.flatMap(_.gs.qiimeClosed).nonEmpty) {
if (samples.values.flatMap(_.gearsSingle.qiimeClosed).nonEmpty) {
Some(new File(outputDir, "qiime_closed_reference"))
} else None
}
def qiimeOpenDir: Option[File] = {
if (samples.values.flatMap(_.gearsSingle.qiimeOpen).nonEmpty) {
Some(new File(outputDir, "qiime_open_reference"))
} else None
}
def qiimeClosedOtuTable: Option[File] = qiimeClosedDir.map(new File(_, "otu_table.biom"))
def qiimeClosedOtuMap: Option[File] = qiimeClosedDir.map(new File(_, "otu_map.txt"))
def qiimeOpenOtuTable: Option[File] = qiimeOpenDir.map(new File(_, "otu_table.biom"))
def qiimeOpenOtuMap: Option[File] = qiimeOpenDir.map(new File(_, "otu_map.txt"))
/**
* Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
*/
def addMultiSampleJobs(): Unit = {
val gss = samples.values.flatMap(_.gs.qiimeClosed).toList
val closedOtuTables = gss.map(_.otuTable)
val closedOtuMaps = gss.map(_.otuMap)
val qiimeCloseds = samples.values.flatMap(_.gearsSingle.qiimeClosed).toList
val closedOtuTables = qiimeCloseds.map(_.otuTable)
val closedOtuMaps = qiimeCloseds.map(_.otuMap)
require(closedOtuTables.size == closedOtuMaps.size)
if (closedOtuTables.nonEmpty) {
if (closedOtuTables.size > 1) {
......@@ -86,14 +94,35 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
add(Ln(qscript, closedOtuMaps.head, qiimeClosedOtuMap.get))
add(Ln(qscript, closedOtuTables.head, qiimeClosedOtuTable.get))
}
}
val qiimeOpens = samples.values.flatMap(_.gearsSingle.qiimeOpen).toList
val openOtuTables = qiimeOpens.map(_.otuTable)
val openOtuMaps = qiimeOpens.map(_.otuMap)
require(openOtuTables.size == openOtuMaps.size)
if (openOtuTables.nonEmpty) {
if (openOtuTables.size > 1) {
val mergeTables = new MergeOtuTables(qscript)
mergeTables.input = openOtuTables
mergeTables.outputFile = qiimeOpenOtuTable.get
add(mergeTables)
val mergeMaps = new MergeOtuMaps(qscript)
mergeMaps.input = openOtuMaps
mergeMaps.output = qiimeOpenOtuMap.get
add(mergeMaps)
//TODO: Plots
} else {
add(Ln(qscript, openOtuMaps.head, qiimeOpenOtuMap.get))
add(Ln(qscript, openOtuTables.head, qiimeOpenOtuTable.get))
}
}
}
/**
* Factory method for Sample class
*
* @param id SampleId
* @return Sample class
*/
......@@ -102,7 +131,8 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
class Sample(sampleId: String) extends AbstractSample(sampleId) {
/**
* Factory method for Library class
* @param id SampleId
*
* @param id SampleId
* @return Sample class
*/
def makeLibrary(id: String): Library = new Library(id)
......@@ -144,9 +174,9 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
def summaryStats = Map()
}
lazy val gs = new GearsSingle(qscript)
gs.sampleId = Some(sampleId)
gs.outputDir = sampleDir
lazy val gearsSingle = new GearsSingle(qscript)
gearsSingle.sampleId = Some(sampleId)
gearsSingle.outputDir = sampleDir
/** Function to add sample jobs */
protected def addJobs(): Unit = {
......@@ -162,9 +192,9 @@ class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qs
add(Zcat(qscript, flexipreps.flatMap(_.fastqR2Qc)) | new Gzip(qscript) > file)
}
gs.fastqR1 = Some(addDownsample(mergeR1, gs.outputDir))
gs.fastqR2 = mergeR2.map(addDownsample(_, gs.outputDir))
add(gs)
gearsSingle.fastqR1 = Some(addDownsample(mergeR1, gearsSingle.outputDir))
gearsSingle.fastqR2 = mergeR2.map(addDownsample(_, gearsSingle.outputDir))
add(gearsSingle)
}
/** Must return files to store into summary */
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.gears
import nl.lumc.sasc.biopet.core.SampleLibraryTag
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.qiime._
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSample
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvan_thof on 12/4/15.
*/
class GearsQiimeOpen(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
var fastqInput: File = _
override def defaults = Map(
"splitlibrariesfastq" -> Map(
"barcode_type" -> "not-barcoded"
)
)
def init() = {
require(fastqInput != null)
require(sampleId.isDefined)
}
private var _otuMap: File = _
def otuMap = _otuMap
private var _otuTable: File = _
def otuTable = _otuTable
def biopetScript() = {
val splitLib = new SplitLibrariesFastq(this)
splitLib.input :+= fastqInput
splitLib.outputDir = new File(outputDir, "split_libraries_fastq")
sampleId.foreach(splitLib.sampleIds :+= _.replaceAll("_", "-"))
add(splitLib)
val openReference = new PickOpenReferenceOtus(this)
openReference.inputFasta = addDownsample(splitLib.outputSeqs, new File(splitLib.outputDir, s"${sampleId.get}.downsample.fna"))
openReference.outputDir = new File(outputDir, "pick_open_reference_otus")
add(openReference)
_otuMap = openReference.otuMap
_otuTable = openReference.otuTable
addSummaryJobs()
}
/** Must return a map with used settings for this pipeline */
def summarySettings: Map[String, Any] = Map()
/** File to put in the summary for thie pipeline */
def summaryFiles: Map[String, File] = Map("otu_table" -> otuTable, "otu_map" -> otuMap)
/** Name of summary output file */
def summaryFile: File = new File(outputDir, "summary.open_reference.json")
val downSample: Option[Double] = config("downsample")
def addDownsample(input: File, output: File): File = {
downSample match {
case Some(x) =>
val seqtk = new SeqtkSample(this)
seqtk.input = input
seqtk.sample = x
seqtk.output = output
add(seqtk)
output
case _ => input
}
}
}
......@@ -43,6 +43,7 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
lazy val krakenScript = if (config("gears_use_kraken", default = true)) Some(new GearsKraken(this)) else None
lazy val qiimeRatx = if (config("gears_use_qiime_rtax", default = false)) Some(new GearsQiimeRtax(this)) else None
lazy val qiimeClosed = if (config("gears_use_qiime_closed", default = false)) Some(new GearsQiimeClosed(this)) else None
lazy val qiimeOpen = if (config("gears_use_qiime_open", default = false)) Some(new GearsQiimeOpen(this)) else None
lazy val seqCount = if (config("gears_use_seq_count", default = false)) Some(new GearsSeqCount(this)) else None
/** Executed before running the script */
......@@ -138,6 +139,12 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
add(qiimeClosed)
}
qiimeOpen foreach { qiimeOpen =>
qiimeOpen.outputDir = new File(outputDir, "qiime_open")
qiimeOpen.fastqInput = combinedFastq
add(qiimeOpen)
}
seqCount.foreach { seqCount =>
seqCount.fastqInput = combinedFastq
seqCount.outputDir = new File(outputDir, "seq_count")
......@@ -155,7 +162,8 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
"skip_flexiprep" -> skipFlexiprep,
"gears_use_kraken" -> krakenScript.isDefined,
"gear_use_qiime_rtax" -> qiimeRatx.isDefined,
"gear_use_qiime_closed" -> qiimeClosed.isDefined
"gear_use_qiime_closed" -> qiimeClosed.isDefined,
"gear_use_qiime_open" -> qiimeOpen.isDefined
)
/** Statistics shown in the summary file */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment