Commit 900ba26b authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Titan script unmodified example

parent 9dd50dc8
......@@ -22,6 +22,8 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.util.matching.Regex
/** Extension for Kraken */
class Kraken(val root: Configurable) extends BiopetCommandLineFunction {
......@@ -49,7 +51,7 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction {
var paired: Boolean = config("paired", default = false)
executable = config("exe", default = "kraken")
override val versionRegex = """Kraken version (.*)""".r
override val versionRegex = """Kraken version ([\d\w\-\.]+)\n.*""".r
override val versionExitcode = List(0, 1)
override val defaultCoreMemory = 8.0
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.sambamba
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
/** Extension for sambamba flagstat */
class SambambaView(val root: Configurable) extends Sambamba {
override val defaultThreads = 2
@Input(doc = "Bam File")
var input: File = _
@Output(doc = "output File")
var output: File = _
var filter: Option[String] = _
val format: Option[String] = config("format", default = "bam")
val regions: Option[File] = config("regions")
val compression_level: Option[Int] = config("compression_level", default = 6)
/** Returns command to execute */
def cmdLine = required(executable) +
required("view") +
optional("--filter", filter) +
optional("--nthreads", nCoresRequest) +
optional("--format", format.get) +
optional("--regions", regions) +
optional("--compression-level", compression_level) +
required("--output" + output) +
required(input)
}
......@@ -24,8 +24,10 @@ import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.kraken.{ KrakenReport, Kraken }
import nl.lumc.sasc.biopet.extensions.picard.{ MergeSamFiles, AddOrReplaceReadGroups, SamToFastq, MarkDuplicates }
import nl.lumc.sasc.biopet.extensions.sambamba.SambambaView
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import nl.lumc.sasc.biopet.tools.FastqSync
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.function.QFunction
import scala.collection.JavaConversions._
......@@ -277,30 +279,39 @@ trait GearsTrait extends MultiSampleQScript with SummaryQScript { qscript =>
bamMetrics.biopetScript
addAll(bamMetrics.functions)
addSummaryQScript(bamMetrics)
} else {
}
// sambamba view -f bam -F "unmapped or mate_is_unmapped" <alnFile> > <extracted.bam>
val samFilterUnmapped = new SambambaView(qscript)
samFilterUnmapped.input = alnFile
samFilterUnmapped.filter = Some("unmapped or mate_is_unmapped")
samFilterUnmapped.output = createFile(".unmapped.bam")
samFilterUnmapped.isIntermediate = true
qscript.add(samFilterUnmapped)
// start bam to fastq (only on unaligned reads) also extract the matesam
val samToFastq = SamToFastq(qscript, alnFile,
new File(sampleDir, sampleId + ".R1.fastq"),
new File(sampleDir, sampleId + ".R2.fastq"))
createFile(".unmap.R1.fastq"),
createFile(".unmap.R2.fastq")
)
samToFastq.isIntermediate = true
qscript.add(samToFastq)
// start fastq to fasta
// start fasta cleaner (dedup) fastq toolkit?
// sync the fastq records
val fastqsync = new FastqSync(qscript)
fastqsync.refFastq = samToFastq.fastqR1
fastqsync.inputFastq1 = samToFastq.fastqR1
fastqsync.inputFastq2 = samToFastq.fastqR2
fastqsync.outputFastq1 = createFile(".unmapsynced.R1.fastq.gz")
fastqsync.outputFastq2 = createFile(".unmapsynced.R2.fastq.gz")
fastqsync.outputStats = createFile(".syncstats.json")
qscript.add(fastqsync)
// start kraken
val krakenAnalysis = new Kraken(qscript)
krakenAnalysis.input = List(samToFastq.fastqR1, samToFastq.fastqR2)
// krakenAnalysis.inputFastQ = true
krakenAnalysis.input = List(fastqsync.outputFastq1, fastqsync.outputFastq2)
krakenAnalysis.output = createFile(".krkn.raw")
krakenAnalysis.paired = true
krakenAnalysis.classified_out = Option(createFile(".krkn.classified.fastq"))
krakenAnalysis.unclassified_out = Option(createFile(".krkn.unclassified.fastq"))
qscript.add(krakenAnalysis)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment