Skip to content
Snippets Groups Projects
Commit b7521058 authored by bow's avatar bow
Browse files

Merge branch 'develop' into feature-wipereads

parents 9c3639f6 35dba66e
No related branches found
No related tags found
No related merge requests found
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="FacetManager">
<facet type="scala" name="Scala">
<configuration>
<option name="compilerLibraryLevel" value="Project" />
<option name="compilerLibraryName" value="Maven: org.scala-lang:scala-compiler-bundle:2.11.2" />
<option name="compilerOptions" value="-dependencyfile $MODULE_DIR$/target/.scala_dependencies" />
<option name="languageLevel" value="Scala 2.11" />
<option name="vmOptions" value="" />
</configuration>
</facet>
<facet type="Python" name="Python">
<configuration sdkName="" />
</facet>
</component>
<component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/scala" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test/scala" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/main/scripts" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="com.twitter:algebird-core_2.10:0.8.1" level="project" />
<orderEntry type="library" name="gatk-protected" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.testng:testng:6.8" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.10" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.beanshell:bsh:2.0b4" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.beust:jcommander:1.27" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.yaml:snakeyaml:1.6" level="project" />
<orderEntry type="library" name="Maven: org.scalatest:scalatest_2.11:2.2.1" level="project" />
<orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.11.2" level="project" />
<orderEntry type="library" name="Maven: org.scala-lang:scala-reflect:2.11.2" level="project" />
<orderEntry type="library" name="Maven: org.scala-lang.modules:scala-xml_2.11:1.0.2" level="project" />
<orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-queue-package-distribution:3.2" level="project" />
<orderEntry type="library" name="Maven: io.argonaut:argonaut_2.11:6.1-M4" level="project" />
<orderEntry type="library" name="Maven: org.scalaz:scalaz-core_2.11:7.1.0" level="project" />
<orderEntry type="library" name="Maven: org.scala-lang.modules:scala-parser-combinators_2.11:1.0.2" level="project" />
<orderEntry type="library" name="Maven: com.github.julien-truffaut:monocle-core_2.11:0.5.0" level="project" />
<orderEntry type="library" name="Maven: org.biojava:biojava3-core:3.1.0" level="project" />
<orderEntry type="library" name="Maven: org.biojava:biojava3-sequencing:3.1.0" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:17.0" level="project" />
<orderEntry type="library" name="Maven: com.baqend:bloom-filter:1.02" level="project" />
<orderEntry type="library" name="Maven: redis.clients:jedis:2.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-pool2:2.2" level="project" />
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.2.4" level="project" />
<orderEntry type="library" name="Maven: com.github.scopt:scopt_2.10:3.2.0" level="project" />
</component>
</module>
......@@ -25,7 +25,10 @@ object BiopetExecutable {
nl.lumc.sasc.biopet.tools.CheckAllelesVcfInBam,
nl.lumc.sasc.biopet.tools.VcfToTsv,
nl.lumc.sasc.biopet.tools.VcfFilter,
nl.lumc.sasc.biopet.tools.FindRepeatsPacBio)
nl.lumc.sasc.biopet.tools.FindRepeatsPacBio,
nl.lumc.sasc.biopet.tools.BedToInterval,
nl.lumc.sasc.biopet.tools.MpileupToVcf,
nl.lumc.sasc.biopet.tools.FastqSplitter)
)
/**
......
......@@ -8,6 +8,7 @@ import scala.collection.JavaConversions._
import java.io.File
import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, CombineGVCFs }
import nl.lumc.sasc.biopet.extensions.picard.AddOrReplaceReadGroups
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.{ Argument }
......@@ -195,6 +196,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
} else throw new IllegalStateException("Readgroup sample and/or library of input bamfile is not correct, file: " + bamFile +
"\nPossible to set 'correct_readgroups' to true on config to automatic fix this")
}
addAll(BamMetrics(this, bamFile, runDir + "metrics/").functions)
libraryOutput.mappedBamFile = bamFile
} else logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
......
......@@ -4,6 +4,7 @@ import htsjdk.samtools.SAMFileReader
import htsjdk.samtools.SAMSequenceRecord
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.PrintWriter
......@@ -27,7 +28,7 @@ class BedToInterval(val root: Configurable) extends BiopetJavaCommandLineFunctio
override def commandLine = super.commandLine + required(input) + required(bamFile) + required(output)
}
object BedToInterval {
object BedToInterval extends ToolCommand {
def apply(root: Configurable, inputBed: File, inputBam: File, outputDir: String): BedToInterval = {
val bedToInterval = new BedToInterval(root)
bedToInterval.input = inputBed
......@@ -44,13 +45,25 @@ object BedToInterval {
return bedToInterval
}
case class Args (inputFile:File = null, outputFile:File = null) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "inputFile") required() valueName("<file>") action { (x, c) =>
c.copy(inputFile = x) } text("out is a required file property")
opt[File]('o', "output") required() valueName("<file>") action { (x, c) =>
c.copy(outputFile = x) } text("out is a required file property")
}
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val writer = new PrintWriter(args(2))
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
val writer = new PrintWriter(commandArgs.outputFile)
val inputSam = new SAMFileReader(new File(args(1)))
val inputSam = new SAMFileReader(commandArgs.inputFile)
val refs = for (SQ <- inputSam.getFileHeader.getSequenceDictionary.getSequences.toArray) yield {
val record = SQ.asInstanceOf[SAMSequenceRecord]
writer.write("@SQ\tSN:" + record.getSequenceName + "\tLN:" + record.getSequenceLength + "\n")
......
......@@ -4,6 +4,7 @@ import java.io.{ BufferedInputStream, File, FileInputStream, PrintWriter }
import java.util.zip.GZIPInputStream
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import scala.io.Source
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -19,24 +20,34 @@ class FastqSplitter(val root: Configurable) extends BiopetJavaCommandLineFunctio
override val defaultVmem = "8G"
memoryLimit = Option(4.0)
override def commandLine = super.commandLine + required(input) + repeat(output)
override def commandLine = super.commandLine + required("-I", input) + repeat("-o", output)
}
object FastqSplitter {
object FastqSplitter extends ToolCommand {
case class Args (inputFile:File = null, outputFile:List[File] = Nil) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "inputFile") required() valueName("<file>") action { (x, c) =>
c.copy(inputFile = x) } text("out is a required file property")
opt[File]('o', "output") required() unbounded() valueName("<file>") action { (x, c) =>
c.copy(outputFile = x :: c.outputFile) } text("out is a required file property")
}
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
val groupsize = 100
val input = new File(args.head)
val output: Array[PrintWriter] = new Array[PrintWriter](args.tail.size)
for (t <- 1 to args.tail.size) output(t - 1) = new PrintWriter(args(t))
val output = for (file <- commandArgs.outputFile) yield new PrintWriter(file)
val inputStream = {
if (input.getName.endsWith(".gz") || input.getName.endsWith(".gzip")) Source.fromInputStream(
if (commandArgs.inputFile.getName.endsWith(".gz") || commandArgs.inputFile.getName.endsWith(".gzip")) Source.fromInputStream(
new GZIPInputStream(
new BufferedInputStream(
new FileInputStream(input)))).bufferedReader
else Source.fromFile(input).bufferedReader
new FileInputStream(commandArgs.inputFile)))).bufferedReader
else Source.fromFile(commandArgs.inputFile).bufferedReader
}
while (inputStream.ready) {
for (writter <- output) {
......
......@@ -3,6 +3,7 @@ package nl.lumc.sasc.biopet.tools
import java.io.File
import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -50,46 +51,47 @@ class MpileupToVcf(val root: Configurable) extends BiopetJavaCommandLineFunction
} else "") +
super.commandLine +
required("-o", output) +
optional("-minDP", minDP) +
optional("-minAP", minAP) +
optional("-homoFraction", homoFraction) +
optional("-ploidy", ploidy) +
required("-sample", sample) +
optional("--minDP", minDP) +
optional("--minAP", minAP) +
optional("--homoFraction", homoFraction) +
optional("--ploidy", ploidy) +
required("--sample", sample) +
(if (inputBam == null) required("-I", inputMpileup) else "")
}
}
object MpileupToVcf {
var input: File = _
var output: File = _
var sample: String = _
var minDP = 8
var minAP = 2
var homoFraction = 0.8
var ploidy = 2
object MpileupToVcf extends ToolCommand {
case class Args (input:File = null, output:File = null, sample:String = null, minDP:Int = 8, minAP:Int = 2,
homoFraction:Double = 0.8, ploidy:Int = 2) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "input") valueName("<file>") action { (x, c) =>
c.copy(input = x) } text("input, default is stdin")
opt[File]('o', "output") required() valueName("<file>") action { (x, c) =>
c.copy(output = x) } text("out is a required file property")
opt[String]('s', "sample") required() action { (x, c) =>
c.copy(sample = x) }
opt[Int]("minDP") required() action { (x, c) =>
c.copy(minDP = x) }
opt[Int]("minAP") required() action { (x, c) =>
c.copy(minAP = x) }
opt[Double]("homoFraction") required() action { (x, c) =>
c.copy(homoFraction = x) }
opt[Int]("ploidy") required() action { (x, c) =>
c.copy(ploidy = x) }
}
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
import scala.collection.mutable.Map
for (t <- 0 until args.size) {
args(t) match {
case "-I" => input = new File(args(t+1))
case "-o" => output = new File(args(t+1))
case "-minDP" => minDP = args(t+1).toInt
case "-minAP" => minAP = args(t+1).toInt
case "-sample" => sample = args(t+1)
case "-homoFraction" => homoFraction = args(t+1).toDouble
case "-ploidy" => ploidy = args(t+1).toInt
case _ =>
}
}
if (input != null && !input.exists) throw new IllegalStateException("Input file does not exist")
if (output == null) throw new IllegalStateException("Output file not found, use -o")
if (sample == null) throw new IllegalStateException("sample not given, pls use -sample")
if (commandArgs.input != null && !commandArgs.input.exists) throw new IllegalStateException("Input file does not exist")
val writer = new PrintWriter(output)
val writer = new PrintWriter(commandArgs.output)
writer.println("##fileformat=VCFv4.1")
writer.println("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">")
writer.println("##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">")
......@@ -101,8 +103,8 @@ object MpileupToVcf {
writer.println("##FORMAT=<ID=AFC,Number=A,Type=Integer,Description=\"Alternetive Forward Reads\">")
writer.println("##FORMAT=<ID=ARC,Number=A,Type=Integer,Description=\"Alternetive Reverse Reads\">")
writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">")
writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + sample)
val inputStream = if (input != null) Source.fromFile(input).getLines else Source.stdin.getLines
writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample)
val inputStream = if (commandArgs.input != null) Source.fromFile(commandArgs.input).getLines else Source.stdin.getLines
class Counts(var forward:Int, var reverse:Int)
for (line <- inputStream;
val values = line.split("\t");
......@@ -166,7 +168,7 @@ object MpileupToVcf {
format += ("RFC" -> counts(ref.toUpperCase).forward.toString)
format += ("RRC" -> counts(ref.toUpperCase).reverse.toString)
format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString)
if (reads >= minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward+value.reverse >= minAP) {
if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward+value.reverse >= commandArgs.minAP) {
alt += key
format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString))
format += ("AFC" -> ( (if (format.contains("AFC")) format("AFC") + "," else "") + value.forward))
......@@ -180,13 +182,13 @@ object MpileupToVcf {
var left = reads - dels
val gt = ArrayBuffer[Int]()
for (p <- 0 to alt.size if gt.size < ploidy) {
for (p <- 0 to alt.size if gt.size < commandArgs.ploidy) {
var max = -1
for (a <- 0 until ad.length if ad(a) > (if (max >= 0) ad(max) else -1) && !gt.exists(_ == a)) max = a
val f = ad(max).toDouble / left
for (a <- 0 to floor(f).toInt if gt.size < ploidy) gt.append(max)
if (f - floor(f) >= homoFraction) {
for (b <- p to ploidy if gt.size < ploidy) gt.append(max)
for (a <- 0 to floor(f).toInt if gt.size < commandArgs.ploidy) gt.append(max)
if (f - floor(f) >= commandArgs.homoFraction) {
for (b <- p to commandArgs.ploidy if gt.size < commandArgs.ploidy) gt.append(max)
}
left -= ad(max)
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment