Commit 658b77f5 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'develop' into feature-mergesvcalls

parents 7e973ac3 b34fc129
......@@ -194,10 +194,11 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
protected def sampleIds: Set[String] = ConfigUtils.any2map(globalConfig.map("samples")).keySet
protected lazy val nameRegex = """^[a-zA-Z0-9][a-zA-Z0-9-_]+[a-zA-Z0-9]$""".r
protected lazy val nameError = " name invalid." +
"Name must have at least 3 characters," +
protected lazy val nameError = "has an invalid name. " +
"Sample names must have at least 3 characters, " +
"must begin and end with an alphanumeric character, " +
"and must not have whitespace."
"and must not have whitespace and special characters. " +
"Dash (-) and underscore (_) are permitted."
/** Runs addAndTrackJobs method for each sample */
final def addSamplesJobs() {
......
......@@ -93,9 +93,9 @@ trait ReportBuilder extends ToolCommand {
private var total = 0
private var _sampleId: Option[String] = None
protected def sampleId = _sampleId
protected[report] def sampleId = _sampleId
private var _libId: Option[String] = None
protected def libId = _libId
protected[report] def libId = _libId
case class ExtFile(resourcePath: String, targetPath: String)
......@@ -152,6 +152,8 @@ trait ReportBuilder extends ToolCommand {
total = ReportBuilder.countPages(indexPage)
logger.info(total + " pages to be generated")
done = 0
logger.info("Generate pages")
val jobs = generatePage(summary, indexPage, cmdArgs.outputDir,
args = pageArgs ++ cmdArgs.pageArgs.toMap ++
......@@ -216,7 +218,7 @@ object ReportBuilder {
protected val engine = new TemplateEngine()
/** Cache of temp file for templates from the classpath / jar */
private var templateCache: Map[String, File] = Map()
private[report] var templateCache: Map[String, File] = Map()
/** This will give the total number of pages including all nested pages */
def countPages(page: ReportPage): Int = {
......
{
"samples": {
"sampleName": {
"libraries": {
"libName": {
}
}
}
}
}
\ No newline at end of file
<%@ var arg: String%>
${arg}
\ No newline at end of file
......@@ -4,13 +4,14 @@ import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.core.extensions.Md5sum
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils }
import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QScript
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.language.reflectiveCalls
import scala.collection.mutable.ListBuffer
/**
......@@ -90,6 +91,20 @@ class MultiSampleQScriptTest extends TestNGSuite with Matchers {
script.functions.size shouldBe 1
}
@Test
def testInvalidSampleName: Unit = {
val script = MultiSampleQScriptTest(sample4 :: Nil)
script.init()
script.biopetScript()
val msg = script.getLastLogMessage
msg shouldBe "Sample 'Something.Invalid' has an invalid name. " +
"Sample names must have at least 3 characters, " +
"must begin and end with an alphanumeric character, " +
"and must not have whitespace and special characters. " +
"Dash (-) and underscore (_) are permitted."
}
}
object MultiSampleQScriptTest {
......@@ -120,6 +135,10 @@ object MultiSampleQScriptTest {
"lib3" -> Map("test" -> "3-3")
))))
val sample4 = Map("samples" -> Map("Something.Invalid" -> Map("libraries" -> Map(
"lib1" -> Map("test" -> "4-1")
))))
val child = Map("samples" -> Map("child" -> Map("tags" -> Map(
"gender" -> "male", "father" -> "father", "mother" -> "mother"))))
val father = Map("samples" -> Map("father" -> Map("tags" -> Map("gender" -> "male"))))
......@@ -136,6 +155,11 @@ object MultiSampleQScriptTest {
.foldLeft(Map[String, Any]()) { case (a, b) => ConfigUtils.mergeMaps(a, b) })
val root = null
def getLastLogMessage: String = {
Logging.errors.toList.last.getMessage
}
class Sample(id: String) extends AbstractSample(id) {
class Library(id: String) extends AbstractLibrary(id) {
/** Function that add library jobs */
......
package nl.lumc.sasc.biopet.core.report
import java.io.File
import java.nio.file.Paths
import com.google.common.io.Files
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by pjvanthof on 24/02/16.
*/
class MultisampleReportBuilderTest extends TestNGSuite with Matchers {
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
}
@Test
def testGeneratePages(): Unit = {
val builder = new MultisampleReportBuilder {
def reportName: String = "test"
def indexPage: ReportPage = ReportPage("Samples" -> generateSamplesPage(Map()) :: Nil, Nil, Map())
def samplePage(sampleId: String, args: Map[String, Any]): ReportPage =
ReportPage("Libraries" -> generateLibraryPage(Map("sampleId" -> Some(sampleId))) :: Nil, Nil, Map())
def libraryPage(sampleId: String, libraryId: String, args: Map[String, Any]) = ReportPage(Nil, Nil, Map())
}
val tempDir = Files.createTempDir()
tempDir.deleteOnExit()
val args = Array("-s", resourcePath("/empty_summary.json"), "-o", tempDir.getAbsolutePath)
builder.main(args)
builder.extFiles.foreach(x => new File(tempDir, "ext" + File.separator + x.targetPath) should exist)
def createFile(path: String*) = new File(tempDir, path.mkString(File.separator))
createFile("index.html") should exist
createFile("Samples", "index.html") should exist
createFile("Samples", "sampleName", "index.html") should exist
createFile("Samples", "sampleName", "Libraries", "index.html") should exist
createFile("Samples", "sampleName", "Libraries", "libName", "index.html") should exist
}
}
package nl.lumc.sasc.biopet.core.report
import java.io.File
import java.nio.file.Paths
import com.google.common.io.Files
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ DataProvider, Test }
/**
* Created by pjvanthof on 24/02/16.
*/
class ReportBuilderTest extends TestNGSuite with Matchers {
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
}
@DataProvider(name = "testGeneratePages")
def generatePageProvider = {
val sample = Array(Some("sampleName"), None)
val lib = Array(Some("libName"), None)
val nested = Array(false, true)
for (s <- sample; l <- lib; n <- nested) yield Array(s, l, n)
}
@Test(dataProvider = "testGeneratePages")
def testGeneratePages(sample: Option[String], lib: Option[String], nested: Boolean): Unit = {
val builder = new ReportBuilder {
def reportName: String = "test"
def indexPage: ReportPage = ReportPage(
(if (nested) "p1" -> ReportPage(Nil, Nil, Map()) :: Nil else Nil), Nil, Map())
}
val tempDir = Files.createTempDir()
tempDir.deleteOnExit()
val args = Array("-s", resourcePath("/empty_summary.json"), "-o", tempDir.getAbsolutePath) ++
sample.map(x => Array("-a", s"sampleId=$x")).getOrElse(Array()) ++
lib.map(x => Array("-a", s"libId=$x")).getOrElse(Array())
builder.main(args)
builder.sampleId shouldBe sample
builder.libId shouldBe lib
builder.extFiles.foreach(x => new File(tempDir, "ext" + File.separator + x.targetPath) should exist)
new File(tempDir, "index.html") should exist
new File(tempDir, "p1" + File.separator + "index.html").exists() shouldBe nested
}
@Test
def testCountPages: Unit = {
ReportBuilder.countPages(ReportPage(Nil, Nil, Map())) shouldBe 1
ReportBuilder.countPages(ReportPage(
"p1" -> ReportPage(Nil, Nil, Map()) :: Nil,
Nil, Map())) shouldBe 2
ReportBuilder.countPages(ReportPage(
"p1" -> ReportPage(Nil, Nil, Map()) :: "p2" -> ReportPage(Nil, Nil, Map()) :: Nil,
Nil, Map())) shouldBe 3
ReportBuilder.countPages(ReportPage(
"p1" -> ReportPage("p1" -> ReportPage(Nil, Nil, Map()) :: Nil, Nil, Map()) :: Nil,
Nil, Map())) shouldBe 3
ReportBuilder.countPages(ReportPage(
"p1" -> ReportPage(Nil, Nil, Map()) :: "p2" -> ReportPage("p1" -> ReportPage(Nil, Nil, Map()) :: Nil, Nil, Map()) :: Nil,
Nil, Map())) shouldBe 4
}
@Test
def testRenderTemplate: Unit = {
ReportBuilder.templateCache = Map()
ReportBuilder.templateCache shouldBe empty
ReportBuilder.renderTemplate("/template.ssp", Map("arg" -> "test")) shouldBe "test"
ReportBuilder.templateCache.size shouldBe 1
ReportBuilder.renderTemplate("/template.ssp", Map("arg" -> "bla")) shouldBe "bla"
ReportBuilder.templateCache.size shouldBe 1
}
}
package nl.lumc.sasc.biopet.core.report
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by pjvanthof on 24/02/16.
*/
class ReportSectionTest extends TestNGSuite with Matchers {
@Test
def testSectionRender: Unit = {
ReportSection("/template.ssp", Map("arg" -> "test")).render() shouldBe "test"
ReportSection("/template.ssp").render(Map("arg" -> "test")) shouldBe "test"
}
}
......@@ -25,7 +25,7 @@ class GvcfToBed(val root: Configurable) extends ToolCommandFunction {
var minQuality: Int = 0
@Argument(doc = "inverse", required = false)
var inverse: Boolean = false
var inverse: Option[File] = None
override def defaultCoreMemory = 4.0
......@@ -35,7 +35,7 @@ class GvcfToBed(val root: Configurable) extends ToolCommandFunction {
required("-O", outputBed) +
optional("-S", sample) +
optional("--minGenomeQuality", minQuality) +
conditional(inverse, "--inverted")
optional("--invertedOutputBed", inverse)
}
}
......@@ -15,10 +15,10 @@
*/
package nl.lumc.sasc.biopet.tools
import java.io.{ PrintWriter, File }
import java.io.{ File, PrintWriter }
import htsjdk.samtools.fastq.{ FastqReader, FastqRecord }
import nl.lumc.sasc.biopet.utils.{ ToolCommand, ConfigUtils }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, ToolCommand }
import scala.collection.JavaConverters._
import scala.collection.immutable.Map
......@@ -26,7 +26,8 @@ import scala.collection.mutable
import scala.language.postfixOps
/**
* Created by pjvanthof on 11/09/15.
* Created by wyleung on 01/12/14.
* Modified by pjvanthof and warindrarto on 27/06/2015
*/
object SeqStat extends ToolCommand {
......@@ -41,12 +42,12 @@ object SeqStat extends ToolCommand {
var nucs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
// generate the baseHistogram and readHistogram
var baseHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var readHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var baseQualHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var readQualHistogram: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer()
var nucleotideHistoMap: mutable.Map[Char, Long] = mutable.Map()
private var baseQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0)
private var readQualHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0)
private var readQualGTEHistoMap: mutable.Map[Int, Long] = mutable.Map(0 -> 0)
case class Args(fastq: File = null, outputJson: Option[File] = None) extends AbstractArgs
......@@ -83,14 +84,15 @@ object SeqStat extends ToolCommand {
*/
def detectPhredEncoding(quals: mutable.ArrayBuffer[Long]): Unit = {
// substract 1 on high value, because we start from index 0
val l_qual = quals.takeWhile(_ == 0).length
val h_qual = quals.length - 1
val qual_low_boundery = quals.takeWhile(_ == 0).length
val qual_high_boundery = quals.length - 1
(l_qual < 59, h_qual > 74) match {
(qual_low_boundery < 59, qual_high_boundery > 74) match {
case (false, true) => phredEncoding = Solexa
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the h_qual exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
// TODO: check this later on
// complex case, we cannot tell wheter this is a sanger or solexa
// but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
// New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K)
case (true, true) => phredEncoding = Solexa
// this is definite a sanger sequence, the lower end is sanger only
case (true, false) => phredEncoding = Sanger
......@@ -102,16 +104,18 @@ object SeqStat extends ToolCommand {
// 'nuc' are the nucleotides 'ACTGN', the max ASCII value for this is T, pre-init the ArrayBuffer to this value
// as we don't expect the have other 'higher' numbered Nucleotides for now.
case class BaseStat(qual: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer(),
nuc: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill('T'.toInt + 1)(0))
nucs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill('T'.toInt + 1)(0))
case class ReadStat(qual: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer(),
nuc: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill('T'.toInt + 1)(0),
nucs: mutable.ArrayBuffer[Long] = mutable.ArrayBuffer.fill('T'.toInt + 1)(0),
var withN: Long = 0L,
lengths: mutable.ArrayBuffer[Int] = mutable.ArrayBuffer())
val baseStats: mutable.ArrayBuffer[BaseStat] = mutable.ArrayBuffer()
val readStats: ReadStat = new ReadStat()
var readLengthHistogram: mutable.Map[String, Long] = mutable.Map.empty
/**
* Compute the quality metric per read
* Results are stored in baseStats and readStats
......@@ -130,25 +134,22 @@ object SeqStat extends ToolCommand {
readStats.lengths ++= mutable.ArrayBuffer.fill(record.length - readStats.lengths.length + 1)(0)
}
val readQual = record.getBaseQualityString
val readQuality = record.getBaseQualityString
val readNucleotides = record.getReadString
if (record.length >= readStats.lengths.size) // Extends array when length not yet possible
(0 to (record.length - readStats.lengths.size)).foreach(_ => readStats.lengths.append(0))
readStats.lengths(record.length) += 1
for (t <- 0 until record.length()) {
if (baseStats(t).qual.length <= readQual(t)) {
baseStats(t).qual ++= mutable.ArrayBuffer.fill(readQual(t).toInt - baseStats(t).qual.length + 1)(0)
if (baseStats(t).qual.length <= readQuality(t)) {
baseStats(t).qual ++= mutable.ArrayBuffer.fill(readQuality(t).toInt - baseStats(t).qual.length + 1)(0)
}
baseStats(t).qual(readQual(t)) += 1
baseStats(t).nuc(readNucleotides(t)) += 1
readStats.nuc(readNucleotides(t)) += 1
baseStats(t).qual(readQuality(t)) += 1
baseStats(t).nucs(readNucleotides(t)) += 1
readStats.nucs(readNucleotides(t)) += 1
}
// implicit conversion to Int using foldLeft(0)
val avgQual: Int = readQual.sum / readQual.length
val avgQual: Int = readQuality.sum / readQuality.length
if (readStats.qual.length <= avgQual) {
readStats.qual ++= mutable.ArrayBuffer.fill(avgQual - readStats.qual.length + 1)(0)
}
......@@ -179,74 +180,53 @@ object SeqStat extends ToolCommand {
if (quals.length <= baseStats(pos).qual.length) {
quals ++= mutable.ArrayBuffer.fill(baseStats(pos).qual.length - quals.length)(0)
}
if (nucs.length <= baseStats(pos).nuc.length) {
for (_ <- nucs.length until baseStats(pos).nuc.length) nucs.append(0)
if (nucs.length <= baseStats(pos).nucs.length) {
nucs ++= mutable.ArrayBuffer.fill( baseStats(pos).nucs.length - nucs.length )(0)
}
// count into the quals
baseStats(pos).qual.zipWithIndex foreach { case (value, index) => quals(index) += value }
// count N into nucs
baseStats(pos).nuc.zipWithIndex foreach { case (value, index) => nucs(index) += value }
baseStats(pos).nucs.zipWithIndex foreach { case (value, index) => nucs(index) += value }
}
detectPhredEncoding(quals)
logger.debug("Detected '" + phredEncoding.toString.toLowerCase + "' encoding in fastq file ...")
for (pos <- nucs.indices) {
// always export the N-nucleotide
if (nucs(pos) > 0 || pos.toChar == 'N') {
nucleotideHistoMap += (pos.toChar -> nucs(pos))
}
}
nucleotideHistoMap = nucs.toList
.foldLeft(mutable.Map[Char, Long]())(
(output, nucleotideCount) => output + (output.size.toChar -> nucleotideCount)
)
// ensure bases: `ACTGN` is always reported even having a zero count.
// Other chars might be counted also, these are also reported
.retain((nucleotide, count) => (count > 0 || "ACTGN".contains(nucleotide.toString)))
// init baseHistogram with the bounderies of the report values
for (pos <- 0 until reportValues.max + 1) {
baseHistogram.append(0)
readHistogram.append(0)
}
baseQualHistogram = quals.slice(phredEncoding.id, quals.size)
baseQualHistogram ++= mutable.ArrayBuffer.fill(reportValues.max + 1 - baseQualHistogram.size)(0L)
for (pos <- quals.indices) {
val key: Int = pos - phredEncoding.id
if (key >= 0) {
baseHistogram(key) += quals(pos)
}
}
readQualHistogram = readStats.qual.slice(phredEncoding.id, readStats.qual.size)
readQualHistogram ++= mutable.ArrayBuffer.fill(reportValues.max + 1 - readQualHistogram.size)(0L)
for (pos <- readStats.qual.indices) {
val key: Int = pos - phredEncoding.id
if (key > 0) {
// count till the max of baseHistogram.length
for (histokey <- 0 until key + 1) {
readHistogram(histokey) += readStats.qual(pos)
readQualGTEHistoMap = readQualHistogram.indices
.foldLeft(mutable.Map[Int, Long]())(
(output, index) => {
output + (output.keys.size -> readQualHistogram.slice(index, readQualHistogram.size).sum)
}
}
}
for (pos <- readHistogram.indices) {
readQualHistoMap += (pos -> readHistogram(pos))
}
)
}
def main(args: Array[String]): Unit = {
val commandArgs: Args = parseArgs(args)
logger.info("Start seqstat")
seqStat(new FastqReader(commandArgs.fastq))
summarize()
logger.info("Seqstat done")
val report: Map[String, Any] = Map(
def reportMap(fastqPath: File): Map[String, Any] = {
Map(
("files",
Map(
("fastq", Map(
("path", commandArgs.fastq))
("path", fastqPath.getAbsolutePath))
)
)
),
("stats", Map(
("bases", Map(
("num_total", nucleotideHistoMap.values.sum),
("num_qual", baseHistogram.toList),
("num_qual", baseQualHistogram.toList),
("nucleotides", nucleotideHistoMap.toMap)
)),
("reads", Map(
......@@ -254,11 +234,24 @@ object SeqStat extends ToolCommand {
("num_total", readStats.qual.sum),
("len_min", readStats.lengths.takeWhile(_ == 0).length),
("len_max", readStats.lengths.length - 1),
("num_avg_qual_gte", readQualHistoMap.toMap),
("qual_encoding", phredEncoding.toString.toLowerCase)
("num_avg_qual_gte", readQualGTEHistoMap.toMap),
("qual_encoding", phredEncoding.toString.toLowerCase),
("len_histogram", readStats.lengths.toList)
))
))
)
}
def main(args: Array[String]): Unit = {
val commandArgs: Args = parseArgs(args)
logger.info("Start seqstat")
seqStat(new FastqReader(commandArgs.fastq))
summarize()
logger.info("Seqstat done")
val report = reportMap(commandArgs.fastq)
commandArgs.outputJson match {
case Some(file) => {
......
......@@ -12,6 +12,8 @@ import org.scalatest.testng.TestNGSuite
import GvcfToBed._
import org.testng.annotations.Test
import scala.io.Source
/**
* Created by ahbbollen on 13-10-15.
*/
......@@ -39,4 +41,46 @@ class GvcfToBedTest extends TestNGSuite with Matchers with MockitoSugar {
VcfUtils.hasMinGenomeQuality(record2, "Sample_102", 3) shouldBe true
VcfUtils.hasMinGenomeQuality(record2, "Sample_102", 99) shouldBe false
}
@Test
def testGvcfToBedOutput = {
val tmp = File.createTempFile("gvcf2bedtest", ".bed")
tmp.deleteOnExit()
val args: Array[String] = Array("-I", unvepped.getAbsolutePath, "-O", tmp.getAbsolutePath, "-S", "Sample_101",
"--minGenomeQuality", "99")
main(args)
Source.fromFile(tmp).getLines().size shouldBe 0
val tmp2 = File.createTempFile("gvcf2bedtest", ".bed")
tmp2.deleteOnExit()
val args2: Array[String] = Array("-I", unvepped.getAbsolutePath, "-O", tmp2.getAbsolutePath, "-S", "Sample_102",
"--minGenomeQuality", "2")
main(args2)
Source.fromFile(tmp2).getLines().size shouldBe 1
}
@Test
def testGvcfToBedInvertedOutput = {
val tmp = File.createTempFile("gvcf2bedtest", ".bed")
val tmp_inv = File.createTempFile("gvcf2bedtest", ".bed")
tmp.deleteOnExit()
tmp_inv.deleteOnExit()
val args: Array[String] = Array("-I", unvepped.getAbsolutePath, "-O", tmp.getAbsolutePath, "-S", "Sample_101",
"--minGenomeQuality", "99", "--invertedOutputBed", tmp_inv.getAbsolutePath)
main(args)