Commit d1df33e7 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'develop' into feature-docs-0.6.0

parents c5451ac1 09da7b13
......@@ -47,16 +47,24 @@ For BAM files as input one should use a config like this:
``` yaml
samples:
Sample_ID_1:
tags:
gender: male
father: sampleNameFather
mother: sampleNameMother
libraries:
Lib_ID_1:
tags:
key: value
bam: MyFirst.bam
Lib_ID_2:
bam: MySecond.bam
```
Note that there is a tool called [SamplesTsvToJson](../tools/SamplesTsvToJson.md) this enables a user to get the sample config without any chance of creating a wrongly formatted JSON file.
#### Tags
In the `tags` key inside a sample or library users can supply tags that belong to samples/libraries. These tags will we automatically parsed inside the summary of a pipeline.
### The settings config
The settings config enables a user to alter the settings for almost all settings available in the tools used for a given pipeline.
......
......@@ -21,12 +21,18 @@ Usage: SamplesTsvToJson [options]
Print version
-i <file> | --inputFiles <file>
Input must be a tsv file, first line is seen as header and must at least have a 'sample' column, 'library' column is optional, multiple files allowed
-t <file> | --tagFiles <file>
-o <file> | --outputFile <file>
~~~
The tool is designed in such a way that a user can provide a TAB seperated file (TSV) with sample specific properties and even those will be parsed by the tool.
For example: a user wants to have certain properties e.g. which treatment a sample got than the user should provide a extra columns called treatment and then the
JSON file is parsed with those properties inside it as well. The order of columns does not matter.
The tag files works the same only the value are prefixed in the key `tags`.
#### Example
~~~ json
......
......@@ -194,10 +194,11 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
protected def sampleIds: Set[String] = ConfigUtils.any2map(globalConfig.map("samples")).keySet
protected lazy val nameRegex = """^[a-zA-Z0-9][a-zA-Z0-9-_]+[a-zA-Z0-9]$""".r
protected lazy val nameError = " name invalid." +
"Name must have at least 3 characters," +
protected lazy val nameError = "has an invalid name. " +
"Sample names must have at least 3 characters, " +
"must begin and end with an alphanumeric character, " +
"and must not have whitespace."
"and must not have whitespace and special characters. " +
"Dash (-) and underscore (_) are permitted."
/** Runs addAndTrackJobs method for each sample */
final def addSamplesJobs() {
......
......@@ -19,8 +19,8 @@ import java.io.File
import htsjdk.samtools.reference.IndexedFastaSequenceFile
import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, Summarizable }
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable }
import scala.collection.JavaConversions._
......@@ -69,16 +69,40 @@ trait Reference extends Configurable {
/** Returns the fasta file */
def referenceFasta(): File = {
val file: File = config("reference_fasta")
checkFasta(file)
val dict = new File(file.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta").stripSuffix(".fna") + ".dict")
val fai = new File(file.getAbsolutePath + ".fai")
this match {
case c: BiopetCommandLineFunction => c.deps :::= dict :: fai :: Nil
case _ =>
if (config.contains("reference_fasta")) {
checkFasta(file)
val dict = new File(file.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta").stripSuffix(".fna") + ".dict")
val fai = new File(file.getAbsolutePath + ".fai")
this match {
case c: BiopetCommandLineFunction => c.deps :::= dict :: fai :: Nil
case _ =>
}
} else {
val defaults = ConfigUtils.mergeMaps(this.defaults, this.internalDefaults)
def getReferences(map: Map[String, Any]): Set[(String, String)] = (for (
(species, species_content: Map[String, Any]) <- map.getOrElse("references", Map[String, Any]()).asInstanceOf[Map[String, Any]].toList;
(reference_name, _) <- species_content.toList
) yield (species, reference_name)).toSet
val references = getReferences(defaults) ++ getReferences(Config.global.map)
if (!references.contains((referenceSpecies, referenceName))) {
val buffer = new StringBuilder()
if (references.exists(_._1 == referenceSpecies)) {
buffer.append(s"Reference: '$referenceName' does not exist in config for species: '$referenceSpecies'")
buffer.append(s"\nRefrences found for species '$referenceSpecies':")
references.filter(_._1 == referenceSpecies).foreach(x => buffer.append("\n - " + x._2))
} else {
buffer.append(s"Species: '$referenceSpecies' does not exist in config")
if (references.nonEmpty) buffer.append("\n References available in config (species -> reference_name):")
else buffer.append("\n No references found in user or global config")
references.toList.sorted.foreach(x => buffer.append(s"\n - ${x._1} -> ${x._2}"))
}
Logging.addError(buffer.toString)
}
}
file
}
......@@ -117,6 +141,7 @@ object Reference {
/**
* Raise an exception when given fasta file has no fai file
*
* @param fastaFile Fasta file
*/
def requireFai(fastaFile: File): Unit = {
......@@ -132,6 +157,7 @@ object Reference {
/**
* Raise an exception when given fasta file has no dict file
*
* @param fastaFile Fasta file
*/
def requireDict(fastaFile: File): Unit = {
......
......@@ -24,7 +24,7 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
@Input(doc = "Python script", required = false)
var python_script: File = _
executable = config("exe", default = "python", submodule = "python")
executable = config("exe", default = "python", submodule = "python", freeVar = false)
protected var python_script_name: String = _
......
......@@ -93,9 +93,9 @@ trait ReportBuilder extends ToolCommand {
private var total = 0
private var _sampleId: Option[String] = None
protected def sampleId = _sampleId
protected[report] def sampleId = _sampleId
private var _libId: Option[String] = None
protected def libId = _libId
protected[report] def libId = _libId
case class ExtFile(resourcePath: String, targetPath: String)
......@@ -152,6 +152,8 @@ trait ReportBuilder extends ToolCommand {
total = ReportBuilder.countPages(indexPage)
logger.info(total + " pages to be generated")
done = 0
logger.info("Generate pages")
val jobs = generatePage(summary, indexPage, cmdArgs.outputDir,
args = pageArgs ++ cmdArgs.pageArgs.toMap ++
......@@ -216,7 +218,7 @@ object ReportBuilder {
protected val engine = new TemplateEngine()
/** Cache of temp file for templates from the classpath / jar */
private var templateCache: Map[String, File] = Map()
private[report] var templateCache: Map[String, File] = Map()
/** This will give the total number of pages including all nested pages */
def countPages(page: ReportPage): Int = {
......
......@@ -112,11 +112,15 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
Map("samples" -> q.samples.map {
case (sampleName, sample) =>
sampleName -> Map(
qscript.summaryName -> Map("settings" -> sample.summarySettings),
qscript.summaryName -> Map(
"settings" -> sample.summarySettings,
"tags" -> sample.sampleTags),
"libraries" -> sample.libraries.map {
case (libName, lib) =>
libName -> Map(
qscript.summaryName -> Map("settings" -> lib.summarySettings)
qscript.summaryName -> Map(
"settings" -> lib.summarySettings,
"tags" -> lib.libTags)
)
}
)
......
{
"samples": {
"sampleName": {
"libraries": {
"libName": {
}
}
}
}
}
\ No newline at end of file
<%@ var arg: String%>
${arg}
\ No newline at end of file
......@@ -4,13 +4,14 @@ import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.core.extensions.Md5sum
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils }
import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QScript
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.language.reflectiveCalls
import scala.collection.mutable.ListBuffer
/**
......@@ -90,6 +91,20 @@ class MultiSampleQScriptTest extends TestNGSuite with Matchers {
script.functions.size shouldBe 1
}
@Test
def testInvalidSampleName: Unit = {
val script = MultiSampleQScriptTest(sample4 :: Nil)
script.init()
script.biopetScript()
val msg = script.getLastLogMessage
msg shouldBe "Sample 'Something.Invalid' has an invalid name. " +
"Sample names must have at least 3 characters, " +
"must begin and end with an alphanumeric character, " +
"and must not have whitespace and special characters. " +
"Dash (-) and underscore (_) are permitted."
}
}
object MultiSampleQScriptTest {
......@@ -120,6 +135,10 @@ object MultiSampleQScriptTest {
"lib3" -> Map("test" -> "3-3")
))))
val sample4 = Map("samples" -> Map("Something.Invalid" -> Map("libraries" -> Map(
"lib1" -> Map("test" -> "4-1")
))))
val child = Map("samples" -> Map("child" -> Map("tags" -> Map(
"gender" -> "male", "father" -> "father", "mother" -> "mother"))))
val father = Map("samples" -> Map("father" -> Map("tags" -> Map("gender" -> "male"))))
......@@ -136,6 +155,11 @@ object MultiSampleQScriptTest {
.foldLeft(Map[String, Any]()) { case (a, b) => ConfigUtils.mergeMaps(a, b) })
val root = null
def getLastLogMessage: String = {
Logging.errors.toList.last.getMessage
}
class Sample(id: String) extends AbstractSample(id) {
class Library(id: String) extends AbstractLibrary(id) {
/** Function that add library jobs */
......
package nl.lumc.sasc.biopet.core.report
import java.io.File
import java.nio.file.Paths
import com.google.common.io.Files
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by pjvanthof on 24/02/16.
*/
class MultisampleReportBuilderTest extends TestNGSuite with Matchers {
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
}
@Test
def testGeneratePages(): Unit = {
val builder = new MultisampleReportBuilder {
def reportName: String = "test"
def indexPage: ReportPage = ReportPage("Samples" -> generateSamplesPage(Map()) :: Nil, Nil, Map())
def samplePage(sampleId: String, args: Map[String, Any]): ReportPage =
ReportPage("Libraries" -> generateLibraryPage(Map("sampleId" -> Some(sampleId))) :: Nil, Nil, Map())
def libraryPage(sampleId: String, libraryId: String, args: Map[String, Any]) = ReportPage(Nil, Nil, Map())
}
val tempDir = Files.createTempDir()
tempDir.deleteOnExit()
val args = Array("-s", resourcePath("/empty_summary.json"), "-o", tempDir.getAbsolutePath)
builder.main(args)
builder.extFiles.foreach(x => new File(tempDir, "ext" + File.separator + x.targetPath) should exist)
def createFile(path: String*) = new File(tempDir, path.mkString(File.separator))
createFile("index.html") should exist
createFile("Samples", "index.html") should exist
createFile("Samples", "sampleName", "index.html") should exist
createFile("Samples", "sampleName", "Libraries", "index.html") should exist
createFile("Samples", "sampleName", "Libraries", "libName", "index.html") should exist
}
}
package nl.lumc.sasc.biopet.core.report
import java.io.File
import java.nio.file.Paths
import com.google.common.io.Files
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ DataProvider, Test }
/**
* Created by pjvanthof on 24/02/16.
*/
class ReportBuilderTest extends TestNGSuite with Matchers {
private def resourcePath(p: String): String = {
Paths.get(getClass.getResource(p).toURI).toString
}
@DataProvider(name = "testGeneratePages")
def generatePageProvider = {
val sample = Array(Some("sampleName"), None)
val lib = Array(Some("libName"), None)
val nested = Array(false, true)
for (s <- sample; l <- lib; n <- nested) yield Array(s, l, n)
}
@Test(dataProvider = "testGeneratePages")
def testGeneratePages(sample: Option[String], lib: Option[String], nested: Boolean): Unit = {
val builder = new ReportBuilder {
def reportName: String = "test"
def indexPage: ReportPage = ReportPage(
(if (nested) "p1" -> ReportPage(Nil, Nil, Map()) :: Nil else Nil), Nil, Map())
}
val tempDir = Files.createTempDir()
tempDir.deleteOnExit()
val args = Array("-s", resourcePath("/empty_summary.json"), "-o", tempDir.getAbsolutePath) ++
sample.map(x => Array("-a", s"sampleId=$x")).getOrElse(Array()) ++
lib.map(x => Array("-a", s"libId=$x")).getOrElse(Array())
builder.main(args)
builder.sampleId shouldBe sample
builder.libId shouldBe lib
builder.extFiles.foreach(x => new File(tempDir, "ext" + File.separator + x.targetPath) should exist)
new File(tempDir, "index.html") should exist
new File(tempDir, "p1" + File.separator + "index.html").exists() shouldBe nested
}
@Test
def testCountPages: Unit = {
ReportBuilder.countPages(ReportPage(Nil, Nil, Map())) shouldBe 1
ReportBuilder.countPages(ReportPage(
"p1" -> ReportPage(Nil, Nil, Map()) :: Nil,
Nil, Map())) shouldBe 2
ReportBuilder.countPages(ReportPage(
"p1" -> ReportPage(Nil, Nil, Map()) :: "p2" -> ReportPage(Nil, Nil, Map()) :: Nil,
Nil, Map())) shouldBe 3
ReportBuilder.countPages(ReportPage(
"p1" -> ReportPage("p1" -> ReportPage(Nil, Nil, Map()) :: Nil, Nil, Map()) :: Nil,
Nil, Map())) shouldBe 3
ReportBuilder.countPages(ReportPage(
"p1" -> ReportPage(Nil, Nil, Map()) :: "p2" -> ReportPage("p1" -> ReportPage(Nil, Nil, Map()) :: Nil, Nil, Map()) :: Nil,
Nil, Map())) shouldBe 4
}
@Test
def testRenderTemplate: Unit = {
ReportBuilder.templateCache = Map()
ReportBuilder.templateCache shouldBe empty
ReportBuilder.renderTemplate("/template.ssp", Map("arg" -> "test")) shouldBe "test"
ReportBuilder.templateCache.size shouldBe 1
ReportBuilder.renderTemplate("/template.ssp", Map("arg" -> "bla")) shouldBe "bla"
ReportBuilder.templateCache.size shouldBe 1
}
}
package nl.lumc.sasc.biopet.core.report
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by pjvanthof on 24/02/16.
*/
class ReportSectionTest extends TestNGSuite with Matchers {
@Test
def testSectionRender: Unit = {
ReportSection("/template.ssp", Map("arg" -> "test")).render() shouldBe "test"
ReportSection("/template.ssp").render(Map("arg" -> "test")) shouldBe "test"
}
}
......@@ -197,7 +197,7 @@ class WriteSummaryTest extends TestNGSuite with Matchers {
object WriteSummaryTest {
def makeWriter(root: Configurable, c: Map[String, Any] = Map()) = new WriteSummary(root) {
override def globalConfig = new Config(c)
override def globalConfig = new Config(c + ("exe" -> "test"))
override def outputs = Seq()
override def inputs = Seq()
qSettings = new QSettings {
......@@ -238,7 +238,7 @@ object WriteSummaryTest {
libId = l
summaryName = "test"
outputDir = new File(".").getAbsoluteFile
override def globalConfig = new Config(c)
override def globalConfig = new Config(c + ("exe" -> "test"))
def summarySettings: Map[String, Any] = settings
def summaryFiles: Map[String, File] = files
val tempFile = File.createTempFile("summary", ".json")
......@@ -256,7 +256,7 @@ object WriteSummaryTest {
new MultiSampleQScript with QScript {
summaryName = "test"
outputDir = new File(".").getAbsoluteFile
override def globalConfig = new Config(c)
override def globalConfig = new Config(c + ("exe" -> "test"))
def summarySettings: Map[String, Any] = settings
def summaryFiles: Map[String, File] = files
val tempFile = File.createTempFile("summary", ".json")
......
#!/usr/bin/env python
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
......@@ -14,12 +15,20 @@
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
from __future__ import print_function
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
__author__="Peter van 't Hof"
import sys
import re
upacPatern = re.compile(r'[RYKMSWBDHV]')
if __name__ == "__main__":
for line in sys.stdin:
l = line.strip().split("\t")
if len(l) >= 3:
l[3] = upacPatern.sub("N", l[3])
print("\t".join(map(str, l)))
......@@ -33,7 +33,9 @@ if __name__ == "__main__":
"""
for line in sys.stdin:
l = line.strip().split("\t")
if l[3] == "0":
l[2] = upacPatern.sub("N", l[2])
if len(l) < 4 or l[3] == "0":
# no alignment to this position
print("\t".join(map(str, l)))
continue
......@@ -49,5 +51,4 @@ if __name__ == "__main__":
if new_size == 0:
l[5] = ""
l[2] = upacPatern.sub("N", l[2])
print("\t".join(map(str, l)))
......@@ -114,7 +114,7 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
var fasta: Option[String] = config("fasta")
var sift: Option[String] = config("sift")
var polyphen: Option[String] = config("polyphen")
var custom: Option[String] = config("custom")
var custom: List[String] = config("custom", default = Nil)
var plugin: List[String] = config("plugin", default = Nil)
var individual: Option[String] = config("individual")
var fields: Option[String] = config("fields")
......@@ -227,7 +227,7 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
optional("--fasta", fasta) +
optional("--sift", sift) +
optional("--polyphen", polyphen) +
optional("--custom", custom) +
repeat("--custom", custom) +
repeat("--plugin", plugin) +
optional("--individual", individual) +
optional("--fields", fields) +
......
......@@ -69,8 +69,8 @@ class Kraken(val root: Configurable) extends BiopetCommandLineFunction with Vers
optional("--threads", nCoresRequest) +
conditional(quick, "--quick") +
optional("--min_hits", minHits) +
optional("--unclassified-out ", unclassified_out.get) +
optional("--classified-out ", classified_out.get) +
optional("--unclassified-out ", unclassified_out) +
optional("--classified-out ", classified_out) +
required("--output", output) +
conditional(preLoad, "--preload") +
conditional(paired, "--paired") +
......
......@@ -65,8 +65,10 @@ class AddOrReplaceReadGroups(val root: Configurable) extends Picard {
/** Returns command to execute */
override def cmdLine = super.cmdLine +
required("INPUT=", input, spaceSeparated = false) +
required("OUTPUT=", output, spaceSeparated = false) +
(if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false)
else required("INPUT=", input, spaceSeparated = false)) +
(if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
else required("OUTPUT=", output, spaceSeparated = false)) +
required("SORT_ORDER=", sortOrder, spaceSeparated = false) +
required("RGID=", RGID, spaceSeparated = false) +
required("RGLB=", RGLB, spaceSeparated = false) +
......
......@@ -34,6 +34,9 @@ class ReorderSam(val root: Configurable) extends Picard with Reference {
@Output(doc = "Output SAM or BAM file", required = true)
var output: File = null
@Output(doc = "The output file to bam file to", required = true)
lazy val outputIndex: File = new File(output.getAbsolutePath.stripSuffix(".bam") + ".bai")
@Argument(doc = "Allow incomplete dict concordance", required = false)
var allowIncompleteDictConcordance: Boolean = config("allow_incomplete_dict_concordance", default = false)
......@@ -49,6 +52,8 @@ class ReorderSam(val root: Configurable) extends Picard with Reference {
conditional(allowIncompleteDictConcordance, "ALLOW_INCOMPLETE_DICT_CONCORDANCE=TRUE") +
conditional(allowContigLengthDiscordance, "ALLOW_CONTIG_LENGTH_DISCORDANCE=TRUE") +
required("REFERENCE=", reference, spaceSeparated = false) +
required("INPUT=", input, spaceSeparated = false) +
required("OUTPUT=", output, spaceSeparated = false)
(if (inputAsStdin) required("INPUT=", new File("/dev/stdin"), spaceSeparated = false)
else required("INPUT=", input, spaceSeparated = false)) +
(if (outputAsStsout) required("OUTPUT=", new File("/dev/stdout"), spaceSeparated = false)
else required("OUTPUT=", output, spaceSeparated = false))
}
Markdown is supported