Commit 857a2b9c authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added SplitLibrariesFastq

parent 5ad3e1c7
package nl.lumc.sasc.biopet.extensions.qiime
import java.io.File
import nl.lumc.sasc.biopet.core.{Version, BiopetCommandLineFunction}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
/**
* Created by pjvan_thof on 12/10/15.
*/
class SplitLibrariesFastq(val root: Configurable) extends BiopetCommandLineFunction with Version {
executable = config("exe", default = "split_libraries_fastq.py")
def versionCommand = executable + " --version"
def versionRegex = """Version: (.*)""".r
@Input
var input: List[File] = Nil
var outputDir: File = _
var v: Option[String] = config("v")
var m: Option[String] = config("m")
var b: Option[String] = config("b")
var store_qual_scores: Boolean = config("store_qual_scores", default = false)
var sample_ids: List[String] = Nil
var store_demultiplexed_fastq: Boolean = config("store_demultiplexed_fastq", default = false)
var retain_unassigned_reads: Boolean = config("retain_unassigned_reads", default = false)
var r: Option[Int] = config("r")
var p: Option[Double] = config("p")
var n: Option[Int] = config("n")
var s: Option[Int] = config("s")
var rev_comp_barcode: Boolean = config("rev_comp_barcode", default = false)
var rev_comp_mapping_barcodes: Boolean = config("rev_comp_mapping_barcodes", default = false)
var rev_comp: Boolean = config("rev_comp", default = false)
var q: Option[Int] = config("q")
var last_bad_quality_char: Option[String] = config("last_bad_quality_char")
var barcode_type: Option[String] = config("barcode_type")
var max_barcode_errors: Option[Double] = config("max_barcode_errors")
var phred_offset: Option[String] = config("phred_offset")
def outputSeqs = new File(outputDir, "seqs.fna")
override def beforeGraph(): Unit = {
super.beforeGraph()
require(input.nonEmpty)
require(outputDir != null)
outputFiles :+= outputSeqs
}
def cmdLine = executable +
optional("-v", v) +
optional("-m", m) +
optional("-b", b) +
conditional(store_qual_scores, "--store_qual_scores") +
(sample_ids match {
case l: List[_] if l.nonEmpty => optional("--sample_ids", l.mkString(","))
case _ => ""
}) +
conditional(store_demultiplexed_fastq, "--store_demultiplexed_fastq") +
conditional(retain_unassigned_reads, "--retain_unassigned_reads") +
optional("-r", r) +
optional("-p", p) +
optional("-n", n) +
optional("-s", s) +
conditional(rev_comp_barcode, "--rev_comp_barcode") +
conditional(rev_comp_mapping_barcodes, "--rev_comp_mapping_barcodes") +
conditional(rev_comp, "--rev_comp") +
optional("-q", q) +
optional("--last_bad_quality_char", last_bad_quality_char) +
optional("--barcode_type", barcode_type) +
optional("--max_barcode_errors", max_barcode_errors) +
optional("--phred_offset", phred_offset) +
optional("-i", input) +
optional("-o", outputDir)
}
\ No newline at end of file
package nl.lumc.sasc.biopet.pipelines.gears
import nl.lumc.sasc.biopet.core.BiopetQScript
import nl.lumc.sasc.biopet.extensions.qiime.{ AssignTaxonomy, PickRepSet, PickOtus }
import nl.lumc.sasc.biopet.core.{SampleLibraryTag, BiopetQScript}
import nl.lumc.sasc.biopet.extensions.qiime.{SplitLibrariesFastq, AssignTaxonomy, PickRepSet, PickOtus}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvan_thof on 12/4/15.
*/
class GearsQiimeRtax(val root: Configurable) extends QScript with BiopetQScript {
class GearsQiimeRtax(val root: Configurable) extends QScript with BiopetQScript with SampleLibraryTag {
var fastaR1: File = _
var fastqR1: File = _
var fastaR2: Option[File] = None
var fastqR2: Option[File] = None
override def fixedValues = Map(
"assigntaxonomy" -> Map(
......@@ -25,25 +25,44 @@ class GearsQiimeRtax(val root: Configurable) extends QScript with BiopetQScript
"otu_picking_method" -> "uclust_ref",
"suppress_new_clusters" -> true,
"enable_rev_strand_match" -> true
),
"splitlibrariesfastq" -> Map(
"barcode_type" -> "not-barcoded"
)
)
def init() = {
require(fastaR1 != null)
require(fastqR1 != null)
}
def biopetScript() = {
val slfR1 = new SplitLibrariesFastq(this)
slfR1.input :+= fastqR1
slfR1.outputDir = new File(outputDir, "split_libraries_fastq_R1")
sampleId.foreach(slfR1.sample_ids :+= _)
add(slfR1)
lazy val slfR2 = fastqR2.map { file =>
val j = new SplitLibrariesFastq(this)
j.input :+= file
j.outputDir = new File(outputDir, "split_libraries_fastq_R2")
sampleId.foreach(j.sample_ids :+= _)
add(j)
j
}
val pickOtus = new PickOtus(this)
pickOtus.inputFasta = fastaR1
pickOtus.inputFasta = slfR1.outputSeqs
pickOtus.outputDir = new File(outputDir, "pick_otus")
add(pickOtus)
val pickRepSet = new PickRepSet(this)
val repSetOutputDir = new File(outputDir, "pick_rep_set")
pickRepSet.inputFile = pickOtus.otusTxt
pickRepSet.fastaInput = Some(fastaR1)
pickRepSet.outputFasta = Some(new File(repSetOutputDir, fastaR1.getName))
pickRepSet.logFile = Some(new File(repSetOutputDir, fastaR1.getName
pickRepSet.fastaInput = Some(slfR1.outputSeqs)
pickRepSet.outputFasta = Some(new File(repSetOutputDir, slfR1.outputSeqs.getName))
pickRepSet.logFile = Some(new File(repSetOutputDir, slfR1.outputSeqs.getName
.stripSuffix(".fasta").stripSuffix(".fa").stripSuffix(".fna") + ".log"))
add(pickRepSet)
......@@ -51,8 +70,8 @@ class GearsQiimeRtax(val root: Configurable) extends QScript with BiopetQScript
assignTaxonomy.outputDir = new File(outputDir, "assign_taxonomy")
assignTaxonomy.jobOutputFile = new File(assignTaxonomy.outputDir, ".assign_taxonomy.out")
assignTaxonomy.inputFasta = pickRepSet.outputFasta.get
assignTaxonomy.read_1_seqs_fp = Some(fastaR1)
assignTaxonomy.read_2_seqs_fp = fastaR2
assignTaxonomy.read_1_seqs_fp = Some(slfR1.outputSeqs)
assignTaxonomy.read_2_seqs_fp = slfR2.map(_.outputSeqs)
add(assignTaxonomy)
}
}
......@@ -96,9 +96,6 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
flexiprep.biopetScript()
addAll(flexiprep.functions)
lazy val fastaR1 = fastqToFasta(flexiprep.fastqR1Qc, outputName + ".R1")
lazy val fastaR2 = flexiprep.fastqR2Qc.map(fastqToFasta(_, outputName + ".R2"))
if (gearsUseKraken) {
val kraken = new GearsKraken(this)
kraken.outputDir = new File(outputDir, "kraken")
......@@ -114,8 +111,8 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
if (gearsUserQiimeRtax) {
val qiimeRatx = new GearsQiimeRtax(this)
qiimeRatx.outputDir = new File(outputDir, "qiime_rtax")
qiimeRatx.fastaR1 = fastaR1
qiimeRatx.fastaR2 = fastaR2
qiimeRatx.fastqR1 = flexiprep.fastqR1Qc
qiimeRatx.fastqR2 = flexiprep.fastqR2Qc
qiimeRatx.init()
qiimeRatx.biopetScript()
addAll(qiimeRatx.functions)
......@@ -135,18 +132,6 @@ class GearsSingle(val root: Configurable) extends QScript with SummaryQScript wi
(if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
(if (fastqR1.isDefined) Map("input_R1" -> fastqR1.get) else Map()) ++
outputFiles
def fastqToFasta(file: File, name: String): File = {
val seqtk = new SeqtkSeq(this) {
override def configName = "seqtkseq"
override def fixedValues = Map("A" -> true)
}
seqtk.input = file
seqtk.output = new File(outputDir, name + ".fasta")
seqtk.isIntermediate = true
add(seqtk)
seqtk.output
}
}
/** This object give a default main method to the pipelines */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment