Skip to content
Snippets Groups Projects
Commit 3a0b6423 authored by bow's avatar bow
Browse files

Merge branch 'feature-toolcommands' into 'develop'

Feature toolcommands

Added 4 tools from Sage
see also #67

See merge request !19
parents 063c377b 6f44aea3
No related branches found
No related tags found
No related merge requests found
......@@ -28,7 +28,11 @@ object BiopetExecutable {
nl.lumc.sasc.biopet.tools.FindRepeatsPacBio,
nl.lumc.sasc.biopet.tools.BedToInterval,
nl.lumc.sasc.biopet.tools.MpileupToVcf,
nl.lumc.sasc.biopet.tools.FastqSplitter)
nl.lumc.sasc.biopet.tools.FastqSplitter,
nl.lumc.sasc.biopet.tools.BedtoolsCoverageToCounts,
nl.lumc.sasc.biopet.tools.SageCountFastq,
nl.lumc.sasc.biopet.tools.SageCreateLibrary,
nl.lumc.sasc.biopet.tools.SageCreateTagCounts)
)
/**
......@@ -37,7 +41,7 @@ object BiopetExecutable {
def main(args: Array[String]): Unit = {
def toBulletedList(m: List[MainCommand], kind: String = "", bullet: String = "-") =
"Available %ss:\n ".format(kind) + bullet + " " + m.map(x => x.commandName).sorted.mkString("\n " + bullet + " ")
"Available %s(s):\n ".format(kind) + bullet + " " + m.map(x => x.commandName).sorted.mkString("\n " + bullet + " ")
def usage(module: String = null): String = {
if (module != null) checkModule(module)
......
......@@ -8,7 +8,11 @@ import nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import nl.lumc.sasc.biopet.scripts.PrefixFastq
import nl.lumc.sasc.biopet.tools.BedtoolsCoverageToCounts
import nl.lumc.sasc.biopet.scripts.SquishBed
import nl.lumc.sasc.biopet.tools.SageCountFastq
import nl.lumc.sasc.biopet.tools.SageCreateLibrary
import nl.lumc.sasc.biopet.tools.SageCreateTagCounts
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.function._
......@@ -64,7 +68,7 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
}
if (tagsLibrary == null) {
val cdl = new CreateDeepsageLibrary(this)
val cdl = new SageCreateLibrary(this)
cdl.input = transcriptome
cdl.output = outputDir + "taglib/tag.lib"
cdl.noAntiTagsOutput = outputDir + "taglib/no_antisense_genes.txt"
......@@ -181,12 +185,12 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
}
def addTablibCounts(fastq:File, outputPrefix: String, outputDir: String) {
val countFastq = new CountFastq(this)
val countFastq = new SageCountFastq(this)
countFastq.input = fastq
countFastq.output = outputDir + outputPrefix + ".raw.counts"
add(countFastq)
val createTagCounts = new CreateTagCounts(this)
val createTagCounts = new SageCreateTagCounts(this)
createTagCounts.input = countFastq.output
createTagCounts.tagLib = tagsLibrary
createTagCounts.countSense = outputDir + outputPrefix + ".tagcount.sense.counts"
......
......@@ -25,7 +25,7 @@ class BedToInterval(val root: Configurable) extends BiopetJavaCommandLineFunctio
override val defaultVmem = "8G"
memoryLimit = Option(4.0)
override def commandLine = super.commandLine + required(input) + required(bamFile) + required(output)
override def commandLine = super.commandLine + required("-I", input) + required("-b", bamFile) + required("-o", output)
}
object BedToInterval extends ToolCommand {
......@@ -45,13 +45,15 @@ object BedToInterval extends ToolCommand {
return bedToInterval
}
case class Args (inputFile:File = null, outputFile:File = null) extends AbstractArgs
case class Args (inputFile:File = null, outputFile:File = null, bamFile:File = null) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "inputFile") required() valueName("<file>") action { (x, c) =>
c.copy(inputFile = x) } text("out is a required file property")
c.copy(inputFile = x) }
opt[File]('o', "output") required() valueName("<file>") action { (x, c) =>
c.copy(outputFile = x) } text("out is a required file property")
c.copy(outputFile = x) }
opt[File]('b', "bam") required() valueName("<file>") action { (x, c) =>
c.copy(bamFile = x) }
}
/**
......@@ -63,7 +65,7 @@ object BedToInterval extends ToolCommand {
val writer = new PrintWriter(commandArgs.outputFile)
val inputSam = new SAMFileReader(commandArgs.inputFile)
val inputSam = new SAMFileReader(commandArgs.bamFile)
val refs = for (SQ <- inputSam.getFileHeader.getSequenceDictionary.getSequences.toArray) yield {
val record = SQ.asInstanceOf[SAMSequenceRecord]
writer.write("@SQ\tSN:" + record.getSequenceName + "\tLN:" + record.getSequenceLength + "\n")
......@@ -72,7 +74,7 @@ object BedToInterval extends ToolCommand {
inputSam.close
val refsMap = Map(refs:_*)
val bedFile = Source.fromFile(args(0))
val bedFile = Source.fromFile(commandArgs.inputFile)
for (
line <- bedFile.getLines;
val split = line.split("\t")
......
package nl.lumc.sasc.biopet.pipelines.sage
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.collection.JavaConversions._
......@@ -27,26 +28,27 @@ class BedtoolsCoverageToCounts(val root: Configurable) extends BiopetJavaCommand
required("-o", output)
}
object BedtoolsCoverageToCounts {
var input: File = _
var output: File = _
object BedtoolsCoverageToCounts extends ToolCommand {
case class Args (input:File = null, output:File = null) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "input") required() valueName("<file>") action { (x, c) =>
c.copy(input = x) }
opt[File]('o', "output") required() unbounded() valueName("<file>") action { (x, c) =>
c.copy(output = x) }
}
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
for (t <- 0 until args.size) {
args(t) match {
case "-I" => input = new File(args(t+1))
case "-o" => output = new File(args(t+1))
case _ =>
}
}
if (input == null || !input.exists) throw new IllegalStateException("Input file not found, use -I")
if (output == null) throw new IllegalStateException("Output file not found, use -o")
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input)
val counts:Map[String, Long] = Map()
for (line <- Source.fromFile(input).getLines) {
for (line <- Source.fromFile(commandArgs.input).getLines) {
val values = line.split("\t")
val gene = values(3)
val count = values(6).toLong
......@@ -56,7 +58,7 @@ object BedtoolsCoverageToCounts {
val sortedCounts:SortedMap[String, Long] = SortedMap(counts.toArray:_*)
val writer = new PrintWriter(output)
val writer = new PrintWriter(commandArgs.output)
for ((seq,count) <- sortedCounts) {
if (count > 0) writer.println(seq + "\t" + count)
}
......
package nl.lumc.sasc.biopet.pipelines.sage
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import org.biojava3.sequencing.io.fastq.{SangerFastqReader, StreamListener, Fastq}
......@@ -11,7 +12,7 @@ import scala.collection.SortedMap
import scala.collection.mutable.Map
import java.io.FileReader
class CountFastq(val root: Configurable) extends BiopetJavaCommandLineFunction {
class SageCountFastq(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName
@Input(doc = "Input fasta", shortName = "input", required = true)
......@@ -28,29 +29,30 @@ class CountFastq(val root: Configurable) extends BiopetJavaCommandLineFunction {
required("-o", output)
}
object CountFastq {
var input: File = _
var output: File = _
object SageCountFastq extends ToolCommand {
case class Args (input:File = null, output:File = null) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "input") required() valueName("<file>") action { (x, c) =>
c.copy(input = x) }
opt[File]('o', "output") required() unbounded() valueName("<file>") action { (x, c) =>
c.copy(output = x) }
}
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
for (t <- 0 until args.size) {
args(t) match {
case "-I" => input = new File(args(t+1))
case "-o" => output = new File(args(t+1))
case _ =>
}
}
if (input == null || !input.exists) throw new IllegalStateException("Input file not found, use -I")
if (output == null) throw new IllegalStateException("Output file not found, use -o")
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input)
val counts:Map[String, Long] = Map()
val reader = new SangerFastqReader
var count = 0
System.err.println("Reading fastq file: " + input)
val fileReader = new FileReader(input)
logger.info("Reading fastq file: " + commandArgs.input)
val fileReader = new FileReader(commandArgs.input)
reader.stream(fileReader, new StreamListener {
def fastq(fastq:Fastq) {
val seq = fastq.getSequence
......@@ -60,13 +62,13 @@ object CountFastq {
if (count % 1000000 == 0) System.err.println(count + " sequences done")
}
})
System.err.println(count + " sequences done")
logger.info(count + " sequences done")
System.err.println("Sorting")
logger.info("Sorting")
val sortedCounts:SortedMap[String, Long] = SortedMap(counts.toArray:_*)
System.err.println("Writting outputfile: " + output)
val writer = new PrintWriter(output)
logger.info("Writting outputfile: " + commandArgs.output)
val writer = new PrintWriter(commandArgs.output)
for ((seq,count) <- sortedCounts) {
writer.println(seq + "\t" + count)
}
......
package nl.lumc.sasc.biopet.pipelines.sage
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.biojava3.core.sequence.DNASequence
import org.biojava3.core.sequence.io.FastaReaderHelper
......@@ -10,8 +11,9 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.collection.SortedMap
import scala.collection.mutable.{Map, Set}
import scala.collection.JavaConversions._
import scala.util.matching.Regex
class CreateDeepsageLibrary(val root: Configurable) extends BiopetJavaCommandLineFunction {
class SageCreateLibrary(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName
@Input(doc = "Input fasta", shortName = "input", required = true)
......@@ -37,22 +39,35 @@ class CreateDeepsageLibrary(val root: Configurable) extends BiopetJavaCommandLin
override def commandLine = super.commandLine +
required("-I", input) +
optional("-tag", tag) +
optional("-length", length) +
optional("-notag", noTagsOutput) +
optional("-noantitag", noAntiTagsOutput) +
optional("--tag", tag) +
optional("--length", length) +
optional("--notag", noTagsOutput) +
optional("--noantitag", noAntiTagsOutput) +
required("-o", output)
}
object CreateDeepsageLibrary {
var tag = "CATG"
var length = 17
var input: File = _
var noTagsOutput: File = _
var noAntiTagsOutput: File = _
var allGenesOutput: File = _
var output: File = _
lazy val tagRegex = (tag + "[CATG]{" + length + "}").r
object SageCreateLibrary extends ToolCommand {
case class Args (input:File = null, tag:String = "CATG", length:Int = 17,output:File = null, noTagsOutput:File = null,
noAntiTagsOutput:File = null, allGenesOutput:File = null) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "input") required() unbounded() valueName("<file>") action { (x, c) =>
c.copy(input = x) }
opt[File]('o', "output") required() unbounded() valueName("<file>") action { (x, c) =>
c.copy(output = x) }
opt[String]("tag") required() unbounded() action { (x, c) =>
c.copy(tag = x) }
opt[Int]("length") required() unbounded() action { (x, c) =>
c.copy(length = x) }
opt[File]("noTagsOutput") required() unbounded() valueName("<file>") action { (x, c) =>
c.copy(noTagsOutput = x) }
opt[File]("noAntiTagsOutput") required() unbounded() valueName("<file>") action { (x, c) =>
c.copy(noAntiTagsOutput = x) }
opt[File]("allGenesOutput") required() unbounded() valueName("<file>") action { (x, c) =>
c.copy(allGenesOutput = x) }
}
var tagRegex: Regex = null
var geneRegex = """ENSG[0-9]{11}""".r
val tagGenesMap: Map[String, TagGenes] = Map()
......@@ -73,24 +88,16 @@ object CreateDeepsageLibrary {
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
for (t <- 0 until args.size) {
args(t) match {
case "-I" => input = new File(args(t+1))
case "-tag" => tag = args(t+1)
case "-length" => length = args(t+1).toInt
case "-o" => output = new File(args(t+1))
case "-notag" => noTagsOutput = new File(args(t+1))
case "-noantitag" => noAntiTagsOutput = new File(args(t+1))
case "-allgenes" => allGenesOutput = new File(args(t+1))
case _ =>
}
}
if (input == null || !input.exists) throw new IllegalStateException("Input file not found, use -I")
if (output == null) throw new IllegalStateException("Output file not found, use -o")
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input)
tagRegex = (commandArgs.tag + "[CATG]{" + commandArgs.length + "}").r
var count = 0
System.err.println("Reading fasta file")
val reader = FastaReaderHelper.readFastaDNASequence(input)
val reader = FastaReaderHelper.readFastaDNASequence(commandArgs.input)
System.err.println("Finding tags")
for ((name, seq) <- reader) {
getTags(name, seq)
......@@ -103,7 +110,7 @@ object CreateDeepsageLibrary {
val tagGenesMapSorted:SortedMap[String, TagGenes] = SortedMap(tagGenesMap.toArray:_*)
System.err.println("Writting output files")
val writer = new PrintWriter(output)
val writer = new PrintWriter(commandArgs.output)
writer.println("#tag\tfirstTag\tAllTags\tFirstAntiTag\tAllAntiTags")
for ((tag, genes) <- tagGenesMapSorted) {
val line = tag + "\t" + genes.firstTag.mkString(",") +
......@@ -114,24 +121,24 @@ object CreateDeepsageLibrary {
}
writer.close()
if (noTagsOutput != null) {
val writer = new PrintWriter(noTagsOutput)
if (commandArgs.noTagsOutput != null) {
val writer = new PrintWriter(commandArgs.noTagsOutput)
for (gene <- allGenes if !tagGenes.contains(gene)) {
writer.println(gene)
}
writer.close
}
if (noAntiTagsOutput != null) {
val writer = new PrintWriter(noAntiTagsOutput)
if (commandArgs.noAntiTagsOutput != null) {
val writer = new PrintWriter(commandArgs.noAntiTagsOutput)
for (gene <- allGenes if !antiTagGenes.contains(gene)) {
writer.println(gene)
}
writer.close
}
if (allGenesOutput != null) {
val writer = new PrintWriter(allGenesOutput)
if (commandArgs.allGenesOutput != null) {
val writer = new PrintWriter(commandArgs.allGenesOutput)
for (gene <- allGenes) {
writer.println(gene)
}
......
package nl.lumc.sasc.biopet.pipelines.sage
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.io.Source
import scala.collection.mutable.Map
import scala.collection.SortedMap
class CreateTagCounts(val root: Configurable) extends BiopetJavaCommandLineFunction {
class SageCreateTagCounts(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName
@Input(doc = "Raw count file", shortName = "input", required = true)
......@@ -35,42 +36,43 @@ class CreateTagCounts(val root: Configurable) extends BiopetJavaCommandLineFunct
override def commandLine = super.commandLine +
required("-I", input) +
required("-taglib", tagLib) +
optional("-sense", countSense) +
optional("-allsense", countAllSense) +
optional("-antisense", countAntiSense) +
optional("-allantisense", countAllAntiSense)
required("--taglib", tagLib) +
optional("--sense", countSense) +
optional("--allsense", countAllSense) +
optional("--antisense", countAntiSense) +
optional("--allantisense", countAllAntiSense)
}
object CreateTagCounts {
var input: File = _
var tagLib: File = _
var countSense: File = _
var countAllSense: File = _
var countAntiSense: File = _
var countAllAntiSense: File = _
object SageCreateTagCounts extends ToolCommand {
case class Args (input:File = null, tagLib:File = null, countSense:File = null, countAllSense:File = null,
countAntiSense:File = null, countAllAntiSense:File = null) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "input") required() unbounded() valueName("<file>") action { (x, c) =>
c.copy(input = x) }
opt[File]('t', "tagLib") required() unbounded() valueName("<file>") action { (x, c) =>
c.copy(tagLib = x) }
opt[File]("countSense") unbounded() valueName("<file>") action { (x, c) =>
c.copy(countSense = x) }
opt[File]("countAllSense") unbounded() valueName("<file>") action { (x, c) =>
c.copy(countAllSense = x) }
opt[File]("countAntiSense") unbounded() valueName("<file>") action { (x, c) =>
c.copy(countAntiSense = x) }
opt[File]("countAllAntiSense") unbounded() valueName("<file>") action { (x, c) =>
c.copy(countAllAntiSense = x) }
}
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
for (t <- 0 until args.size) {
args(t) match {
case "-I" => input = new File(args(t+1))
case "-taglib" => tagLib = new File(args(t+1))
case "-sense" => countSense = new File(args(t+1))
case "-allsense" => countAllSense = new File(args(t+1))
case "-antisense" => countAntiSense = new File(args(t+1))
case "-allantisense" => countAllAntiSense = new File(args(t+1))
case _ =>
}
}
if (input == null || !input.exists) throw new IllegalStateException("Input file not found, use -I")
if (tagLib == null) throw new IllegalStateException("Output file not found, use -o")
if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input)
val rawCounts: Map[String, Long] = Map()
for (line <- Source.fromFile(input).getLines) {
for (line <- Source.fromFile(commandArgs.input).getLines) {
val values = line.split("\t")
val gene = values(0)
val count = values(1).toLong
......@@ -83,7 +85,7 @@ object CreateTagCounts {
val antiSenseCounts: Map[String, Long] = Map()
val allAntiSenseCounts: Map[String, Long] = Map()
for (line <- Source.fromFile(tagLib).getLines if !line.startsWith("#")) {
for (line <- Source.fromFile(commandArgs.tagLib).getLines if !line.startsWith("#")) {
val values = line.split("\t")
val tag = values(0)
val sense = values(1)
......@@ -126,9 +128,9 @@ object CreateTagCounts {
writer.close
}
}
writeFile(countSense, senseCounts)
writeFile(countAllSense, allSenseCounts)
writeFile(countAntiSense, antiSenseCounts)
writeFile(countAllAntiSense, allAntiSenseCounts)
writeFile(commandArgs.countSense, senseCounts)
writeFile(commandArgs.countAllSense, allSenseCounts)
writeFile(commandArgs.countAntiSense, antiSenseCounts)
writeFile(commandArgs.countAllAntiSense, allAntiSenseCounts)
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment