Commit 404dd4c7 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub
Browse files

Merge branch 'develop' into fix-BIOPET-685

parents fa2c04f0 4a02224c
......@@ -21,10 +21,6 @@ node('local') {
}
}
stage('Report Tests') {
junit '*/target/surefire-reports/*.xml'
}
stage('Check git on changes') {
sh 'if [ $(git diff | wc -l) -eq 0 ]; then true; else echo "[ERROR] Git is not clean anymore after build"; git diff; echo "[ERROR] This might be caused by reformated code, if so run maven locally"; false; fi'
}
......
......@@ -54,10 +54,12 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.vcfstats.VcfStats,
nl.lumc.sasc.biopet.tools.VcfToTsv,
nl.lumc.sasc.biopet.tools.ReplaceContigsGtfFile,
nl.lumc.sasc.biopet.tools.ExtractTagsFromGtf,
nl.lumc.sasc.biopet.tools.ReplaceContigsVcfFile,
nl.lumc.sasc.biopet.tools.VcfWithVcf,
nl.lumc.sasc.biopet.tools.VepNormalizer,
nl.lumc.sasc.biopet.tools.WipeReads,
nl.lumc.sasc.biopet.tools.MultiCoverage,
nl.lumc.sasc.biopet.tools.NcbiReportToContigMap,
nl.lumc.sasc.biopet.tools.DownloadNcbiAssembly
)
......
package nl.lumc.sasc.biopet.tools
import java.io.{File, PrintWriter}
import nl.lumc.sasc.biopet.utils.ToolCommand
import nl.lumc.sasc.biopet.utils.annotation.Feature
import scala.io.Source
/**
* Created by pjvan_thof on 8-6-17.
*/
object ExtractTagsFromGtf extends ToolCommand {
case class Args(outputFile: File = null,
gtfFile: File = null,
tags: List[String] = Nil,
feature: Option[String] = None)
extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('o', "output") required () unbounded () valueName "<file>" action { (x, c) =>
c.copy(outputFile = x)
} text "Input refFlat file. Mandatory"
opt[File]('g', "gtfFile") required () unbounded () valueName "<file>" action { (x, c) =>
c.copy(gtfFile = x)
} text "Output gtf file. Mandatory"
opt[String]('t', "tag") required () unbounded () valueName "<string>" action { (x, c) =>
c.copy(tags = c.tags ::: x :: Nil)
} text "Tags to extract"
opt[String]('f', "feature") unbounded () valueName "<string>" action { (x, c) =>
c.copy(feature = Some(x))
} text "Filter for only this feature type"
}
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val cmdArgs
: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
logger.info("Start")
val reader = Source.fromFile(cmdArgs.gtfFile)
val writer = new PrintWriter(cmdArgs.outputFile)
writer.println(cmdArgs.tags.mkString("#", "\t", ""))
reader
.getLines()
.filter(!_.startsWith("#"))
.map(Feature.fromLine)
.filter(f => cmdArgs.feature.forall(_ == f.feature))
.foreach { f =>
writer.println(cmdArgs.tags.map(f.attributes.get).map(_.getOrElse(".")).mkString("\t"))
}
reader.close()
writer.close()
logger.info("Done")
}
}
......@@ -17,13 +17,13 @@ object GtfToRefflat extends ToolCommand {
extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('r', "refFlat") required () valueName "<file>" action { (x, c) =>
opt[File]('r', "refFlat") required () unbounded () valueName "<file>" action { (x, c) =>
c.copy(refFlat = x)
} text "Input refFlat file. Mandatory"
opt[File]('g', "gtfFile") required () valueName "<file>" action { (x, c) =>
opt[File]('g', "gtfFile") required () unbounded () valueName "<file>" action { (x, c) =>
c.copy(gtfFile = x)
} text "Output gtf file. Mandatory"
opt[File]('R', "referenceFasta") valueName "<file>" action { (x, c) =>
opt[File]('R', "referenceFasta") unbounded () valueName "<file>" action { (x, c) =>
c.copy(referenceFasta = Some(x))
} text "Reference file"
}
......
package nl.lumc.sasc.biopet.tools
import java.io.{File, PrintWriter}
import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.utils.{BamUtils, ToolCommand}
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import scala.collection.JavaConversions._
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}
import scala.concurrent.ExecutionContext.Implicits.global
/**
* Created by pjvanthof on 17/06/2017.
*/
object MultiCoverage extends ToolCommand {
case class Args(bedFile: File = null,
bamFiles: List[File] = Nil,
outputFile: File = null,
mean: Boolean = false)
extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('L', "bedFile") required () maxOccurs 1 unbounded () valueName "<file>" action {
(x, c) =>
c.copy(bedFile = x)
} text "input bedfile"
opt[File]('b', "bamFile") required () unbounded () valueName "<file>" action { (x, c) =>
c.copy(bamFiles = x :: c.bamFiles)
} text "input bam files"
opt[File]('o', "output") required () maxOccurs 1 unbounded () valueName "<file>" action {
(x, c) =>
c.copy(outputFile = x)
} text "output file"
opt[Unit]("mean") unbounded () valueName "<file>" action { (x, c) =>
c.copy(mean = true)
} text "By default total bases is outputed, enable this option make the output relative to region length"
}
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val cmdargs
: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
logger.info("Start")
val bamFiles = BamUtils.sampleBamMap(cmdargs.bamFiles)
val futures = for (region <- BedRecordList.fromFile(cmdargs.bedFile).allRecords)
yield
Future {
val samInterval = region.toSamInterval
val counts = bamFiles.map {
case (sampleName, bamFile) =>
val samReader = SamReaderFactory.makeDefault.open(bamFile)
val count = samReader
.queryOverlapping(samInterval.getContig, samInterval.getStart, samInterval.getEnd)
.foldLeft(0L) {
case (bases, samRecord) =>
val start = (samInterval.getStart :: samRecord.getAlignmentStart :: Nil).max
val end = (samInterval.getEnd :: samRecord.getAlignmentEnd + 1 :: Nil).min
val length = end - start
bases + (if (length < 0) 0 else length)
}
samReader.close()
if (cmdargs.mean && region.length > 0) sampleName -> (count.toDouble / region.length)
else if (cmdargs.mean) sampleName -> 0.0
else sampleName -> count
}
region -> counts
}
logger.info("Reading bam files")
var count = 0
val writer = new PrintWriter(cmdargs.outputFile)
val samples = bamFiles.keys.toList
writer.println(s"#contig\tstart\tend\t${samples.mkString("\t")}")
for (future <- futures) {
val (region, counts) = Await.result(future, Duration.Inf)
writer.println(
s"${region.chr}\t${region.start}\t${region.end}\t${samples.map(counts).mkString("\t")}")
count += 1
if (count % 1000 == 0) logger.info(s"$count regions done")
}
logger.info(s"$count regions done")
writer.close()
logger.info("Done")
}
}
......@@ -18,7 +18,7 @@ object ReplaceContigsGtfFile extends ToolCommand {
extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "input") required () valueName "<file>" action { (x, c) =>
opt[File]('I', "input") required () unbounded () valueName "<file>" action { (x, c) =>
c.copy(input = x)
} text "Input gtf file"
opt[File]('o', "output") required () unbounded () valueName "<file>" action { (x, c) =>
......
......@@ -30,7 +30,7 @@ import scala.collection.JavaConversions._
/**
* Created by ahbbollen on 27-8-15.
*/
class MpileupToVcfTest extends TestNGSuite with MockitoSugar with Matchers {
class MpileupToVcfTest extends TestNGSuite with Matchers {
import MpileupToVcf._
private def resourcePath(p: String): String = {
......
package nl.lumc.sasc.biopet.tools
import java.io.File
import java.nio.file.Paths
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.io.Source
/**
* Created by pjvanthof on 17/06/2017.
*/
class MultiCoverageTest extends TestNGSuite with Matchers {
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
}
@Test
def testDefault(): Unit = {
val outputFile = File.createTempFile("output.", ".txt")
outputFile.deleteOnExit()
MultiCoverage.main(
Array("-L",
resourcePath("/rrna02.bed"),
"-b",
resourcePath("/paired01.bam"),
"-o",
outputFile.getAbsolutePath))
Source.fromFile(outputFile).getLines().toList shouldBe List(
"#contig\tstart\tend\tWipeReadsTestCase",
"chrQ\t300\t350\t0",
"chrQ\t350\t400\t0",
"chrQ\t450\t480\t9",
"chrQ\t470\t475\t0",
"chrQ\t1\t200\t40",
"chrQ\t150\t250\t19"
)
}
@Test
def testMean(): Unit = {
val outputFile = File.createTempFile("output.", ".txt")
outputFile.deleteOnExit()
MultiCoverage.main(
Array("-L",
resourcePath("/rrna02.bed"),
"-b",
resourcePath("/paired01.bam"),
"-o",
outputFile.getAbsolutePath,
"--mean"))
Source.fromFile(outputFile).getLines().toList shouldBe List(
"#contig\tstart\tend\tWipeReadsTestCase",
"chrQ\t300\t350\t0.0",
"chrQ\t350\t400\t0.0",
"chrQ\t450\t480\t0.3",
"chrQ\t470\t475\t0.0",
"chrQ\t1\t200\t0.20100502512562815",
"chrQ\t150\t250\t0.19"
)
}
}
......@@ -151,7 +151,7 @@ case class BedRecord(chr: String,
this
}
def toSamInterval = (name, strand) match {
def toSamInterval: Interval = (name, strand) match {
case (Some(name), Some(strand)) => new Interval(chr, start + 1, end, !strand, name)
case (Some(name), _) => new Interval(chr, start + 1, end, false, name)
case _ => new Interval(chr, start + 1, end)
......
......@@ -202,10 +202,11 @@
<plugin>
<groupId>org.antipathy</groupId>
<artifactId>mvn-scalafmt</artifactId>
<version>0.1</version>
<version>0.3_${scalaFMTVersion}</version>
<inherited>false</inherited>
<configuration>
<configLocation>${project.basedir}/.scalafmt.conf</configLocation>
<parameters>--non-interactive --quiet</parameters>
</configuration>
<executions>
<execution>
......@@ -215,20 +216,6 @@
</goals>
</execution>
</executions>
<dependencies>
<!-- https://mvnrepository.com/artifact/com.geirsson/scalafmt-core_2.11 -->
<dependency>
<groupId>com.geirsson</groupId>
<artifactId>scalafmt-core_2.11</artifactId>
<version>${scalaFMTVersion}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.geirsson/scalafmt-cli_2.11 -->
<dependency>
<groupId>com.geirsson</groupId>
<artifactId>scalafmt-cli_2.11</artifactId>
<version>${scalaFMTVersion}</version>
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>pl.project13.maven</groupId>
......
......@@ -25,7 +25,7 @@ class GenotypeGvcfs(val parent: Configurable) extends QScript with BiopetQScript
val maxNumberOfFiles: Int = config("max_number_of_files", default = 10)
def finalGvcfFile = new File(outputDir, s"$namePrefix.gvcf.vcf.gz")
def finalGvcfFile = new File(outputDir, s"$namePrefix.g.vcf.gz")
def finalVcfFile = new File(outputDir, s"$namePrefix.vcf.gz")
/** Init for pipeline */
......
......@@ -57,9 +57,9 @@ class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller {
protected val genotypeGvcfs = new GenotypeGvcfs(this)
override def outputFile: genotypeGvcfs.File = genotypeGvcfs.finalVcfFile
override def outputFile: File = genotypeGvcfs.finalVcfFile
def finalGvcfFile: genotypeGvcfs.File = genotypeGvcfs.finalGvcfFile
def finalGvcfFile: File = genotypeGvcfs.finalGvcfFile
def biopetScript() {
gVcfFiles = for ((sample, inputBam) <- inputBams) yield {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment