Commit db12161e authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge branch 'develop' into fix-BIOPET-637

parents 8ca1fd0f 492b52f0
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._)
......@@ -16,8 +17,13 @@
<%@ var showTable: Boolean = true %>
<%@ var showIntro: Boolean = true%>
<%@ var runId: Int %>
<%@ var allSamples: Seq[Sample] %>
<%@ var allLibraries: Seq[Library] %>
#{
val samples = Await.result(summary.getSamples(runId = Some(runId)), Duration.Inf)
val samples = sampleId match {
case Some(id) => allSamples.filter(_.id == id).toList
case _ => allSamples.toList
}
}#
#if (showIntro)
<br/>
......@@ -74,12 +80,12 @@
val libs: List[Option[Int]] = (libId, sampleLevel) match {
case (_, true) => List(None)
case (Some(_), _) => List(libId)
case _ => Await.result(summary.getLibraries(sampleId = Some(sample.id), runId = Some(runId)), Duration.Inf).map(x => Some(x.id)).toList
case _ => allLibraries.filter(_.sampleId == sample.id).map(x => Some(x.id)).toList
}
}#
<tr><td rowspan="${libs.size}"><a href="${rootPath}Samples/${sample.name}/index.html">${sample.name}</a></td>
#for (libId <- libs)
#{ val libName = libId.map(l => Await.result(summary.getLibraryName(l), Duration.Inf)) }#
#{ val libName = libId.map(l => allLibraries.find(_.id == l).get.name) }#
#if (libs.head != libId) <tr> #end
#if (!sampleLevel) <td><a href="${rootPath}Samples/${sample.name}/Libraries/${libName}/index.html">${libName}</a></td> #end
#{
......
......@@ -2,9 +2,8 @@
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
<%@ var summary: Summary %>
<%@ var rootPath: String %>
<%@ var sampleId: Option[String] %>
<%@ var libId: Option[String] = None %>
<%@ var sampleId: Option[Int] %>
<%@ var libId: Option[Int] = None %>
<table class="table">
<tbody>
<tr><th>Pipeline</th><td>BamMetrics</td></tr>
......
#import(nl.lumc.sasc.biopet.utils.summary.Summary)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
<%@ var summary: Summary %>
<%@ var sampleId: Option[String] %>
<%@ var libId: Option[String] = None %>
<%@ var rootPath: String %>
<%@ var metricsTag: String = "bammetrics" %>
<%@ var fields: List[String] = List("All", "Mapped", "Duplicates", "MAPQ>30", "MateUnmapped", "Mate on other chr")%>
<table>
<tbody>
#for (field <- fields)
<tr><th>${field}</th><td>
#if (libId.isDefined)
${summary.getLibraryValue(sampleId.get, libId.get, metricsTag, "stats", "bamstats", "flagstats", field)}
#else
${summary.getSampleValue(sampleId.get, metricsTag, "stats", "bamstats", "flagstats", field)}
#end
</td></tr>
#end
</tbody>
</table>
#import(nl.lumc.sasc.biopet.utils.summary.Summary)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(java.io.File)
<%@ var summary: Summary %>
<%@ var sampleId: Option[String] %>
<%@ var libId: Option[String] = None %>
<%@ var metricsTag: String = "bammetrics" %>
<table class="table sortable-theme-bootstrap">
<thead><tr>
<th>Path</th>
<th>MD5</th>
</tr></thead>
<tbody>
<tr>
<td>${summary.getValue(sampleId, libId, metricsTag, "files", "pipeline", "bamfile", "path")}</td>
<td>${summary.getValue(sampleId, libId, metricsTag, "files", "pipeline", "bamfile", "md5")}</td>
</tr>
</tbody>
</table>
\ No newline at end of file
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(scala.concurrent.Await)
#import(scala.concurrent.duration.Duration)
#import(nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport)
#import(java.io.File)
<%@ var summary: SummaryDb %>
......@@ -17,10 +16,14 @@
<%@ var showIntro: Boolean = true%>
<%@ var runId: Int %>
<%@ var fields: List[String] = List("min", "max", "mean", "median", "modal")%>
<%@ var allSamples: Seq[Sample] %>
<%@ var allLibraries: Seq[Library] %>
#{
val samples = Await.result(summary.getSamples(runId = Some(runId)), Duration.Inf)
val samples = sampleId match {
case Some(id) => allSamples.filter(_.id == id).toList
case _ => allSamples.toList
}
}#
#if (showIntro)
<br/>
<div class="row">
......@@ -74,12 +77,12 @@
val libs: List[Option[Int]] = (libId, sampleLevel) match {
case (_, true) => List(None)
case (Some(_), _) => List(libId)
case _ => Await.result(summary.getLibraries(sampleId = Some(sample.id), runId = Some(runId)), Duration.Inf).map(x => Some(x.id)).toList
case _ => allLibraries.filter(_.sampleId == sample.id).map(x => Some(x.id)).toList
}
}#
<tr><td rowspan="${libs.size}"><a href="${rootPath}Samples/${sample.name}/index.html">${sample.name}</a></td>
#for (libId <- libs)
#{ val libName = libId.map(l => Await.result(summary.getLibraryName(l), Duration.Inf)) }#
#{ val libName = libId.map(l => allLibraries.find(_.id == l).get.name) }#
#if (libs.head != libId) <tr> #end
#if (!sampleLevel) <td><a href="${rootPath}Samples/${sample.name}/Libraries/${libName}/index.html">${libName}</a></td> #end
#{
......
#import(nl.lumc.sasc.biopet.utils.IoUtils)
#import(org.apache.commons.io.FileUtils)
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._)
......@@ -18,11 +19,14 @@
<%@ var target: Option[String] %>
<%@ var runId: Int %>
<%@ var fields: List[String] = List("mean", "median", "max", "horizontal", "frac_min_10x", "frac_min_20x", "frac_min_30x", "frac_min_40x", "frac_min_50x") %>
<%@ var allSamples: Seq[Sample] %>
<%@ var allLibraries: Seq[Library] %>
#{
val samples = Await.result(summary.getSamples(runId = Some(runId)), Duration.Inf)
val samples = sampleId match {
case Some(id) => allSamples.filter(_.id == id).toList
case _ => allSamples.toList
}
}#
<table class="table">
<thead><tr>
<th>sample</th>
......@@ -36,7 +40,7 @@
val libs: List[Option[Int]] = (libId, sampleLevel) match {
case (_, true) => List(None)
case (Some(_), _) => List(libId)
case _ => Await.result(summary.getLibraries(sampleId = Some(sample.id), runId = Some(runId)), Duration.Inf).map(x => Some(x.id)).toList
case _ => allLibraries.filter(_.sampleId == sample.id).map(x => Some(x.id)).toList
}
}#
<tr><td rowspan="${libs.size}"><a href="${rootPath}Samples/${sample.name}/index.html">${sample.name}</a></td>
......
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._)
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema._)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(scala.concurrent.Await)
#import(scala.concurrent.duration.Duration)
#import(nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport)
#import(java.io.File)
<%@ var summary: SummaryDb %>
......@@ -18,10 +17,14 @@
<%@ var showTable: Boolean = true %>
<%@ var showIntro: Boolean = true%>
<%@ var runId: Int %>
<%@ var allSamples: Seq[Sample] %>
<%@ var allLibraries: Seq[Library] %>
#{
val samples = Await.result(summary.getSamples(runId = Some(runId)), Duration.Inf)
val samples = sampleId match {
case Some(id) => allSamples.filter(_.id == id).toList
case _ => allSamples.toList
}
}#
#if (showIntro)
<br/>
<div class="row">
......@@ -76,12 +79,12 @@
val libs: List[Option[Int]] = (libId, sampleLevel) match {
case (_, true) => List(None)
case (Some(_), _) => List(libId)
case _ => Await.result(summary.getLibraries(sampleId = Some(sample.id), runId = Some(runId)), Duration.Inf).map(x => Some(x.id)).toList
case _ => allLibraries.filter(_.sampleId == sample.id).map(x => Some(x.id)).toList
}
}#
<tr><td rowspan="${libs.size}"><a href="${rootPath}Samples/${sample.name}/index.html">${sample.name}</a></td>
#for (libId <- libs)
#{ val libName = libId.map(l => Await.result(summary.getLibraryName(l), Duration.Inf)) }#
#{ val libName = libId.map(l => allLibraries.find(_.id == l).get.name) }#
#if (libs.head != libId) <tr> #end
#if (!sampleLevel) <td><a href="${rootPath}Samples/${sample.name}/Libraries/${libName}/index.html">${libName}</a></td> #end
#{
......
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb)
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(scala.concurrent.Await)
#import(scala.concurrent.duration.Duration)
#import(nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport)
#import(java.io.File)
<%@ var summary: SummaryDb %>
......@@ -17,11 +16,14 @@
<%@ var showIntro: Boolean = true%>
<%@ var runId: Int %>
<%@ var fields: List[String] = List("min", "max", "mean", "median", "modal")%>
<%@ var allSamples: Seq[Sample] %>
<%@ var allLibraries: Seq[Library] %>
#{
val samples = Await.result(summary.getSamples(runId = Some(runId)), Duration.Inf)
val samples = sampleId match {
case Some(id) => allSamples.filter(_.id == id).toList
case _ => allSamples.toList
}
}#
#if (showIntro)
<br/>
<div class="row">
......@@ -75,12 +77,12 @@
val libs: List[Option[Int]] = (libId, sampleLevel) match {
case (_, true) => List(None)
case (Some(_), _) => List(libId)
case _ => Await.result(summary.getLibraries(sampleId = Some(sample.id), runId = Some(runId)), Duration.Inf).map(x => Some(x.id)).toList
case _ => allLibraries.filter(_.sampleId == sample.id).map(x => Some(x.id)).toList
}
}#
<tr><td rowspan="${libs.size}"><a href="${rootPath}Samples/${sample.name}/index.html">${sample.name}</a></td>
#for (libId <- libs)
#{ val libName = libId.map(l => Await.result(summary.getLibraryName(l), Duration.Inf)) }#
#{ val libName = libId.map(l => allLibraries.find(_.id == l).get.name) }#
#if (libs.head != libId) <tr> #end
#if (!sampleLevel) <td><a href="${rootPath}Samples/${sample.name}/Libraries/${libName}/index.html">${libName}</a></td> #end
#{
......
#{ //TODO: Need content }#
Todo
\ No newline at end of file
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(scala.concurrent.Await)
#import(scala.concurrent.duration.Duration)
#import(nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport)
#import(java.io.File)
<%@ var summary: SummaryDb %>
......@@ -18,10 +17,14 @@
<%@ var showTable: Boolean = true %>
<%@ var showIntro: Boolean = true%>
<%@ var runId: Int %>
<%@ var allSamples: Seq[Sample] %>
<%@ var allLibraries: Seq[Library] %>
#{
val samples = Await.result(summary.getSamples(runId = Some(runId)), Duration.Inf)
val samples = sampleId match {
case Some(id) => allSamples.filter(_.id == id).toList
case _ => allSamples.toList
}
}#
#if (showIntro)
<br/>
<div class="row">
......@@ -66,12 +69,12 @@
val libs: List[Option[Int]] = (libId, sampleLevel) match {
case (_, true) => List(None)
case (Some(_), _) => List(libId)
case _ => Await.result(summary.getLibraries(sampleId = Some(sample.id), runId = Some(runId)), Duration.Inf).map(x => Some(x.id)).toList
case _ => allLibraries.filter(_.sampleId == sample.id).map(x => Some(x.id)).toList
}
}#
<tr><td rowspan="${libs.size}"><a href="${rootPath}Samples/${sample}/index.html">${sample}</a></td>
#for (libId <- libs)
#{ val libName = libId.map(l => Await.result(summary.getLibraryName(l), Duration.Inf)) }#
#{ val libName = libId.map(l => allLibraries.find(_.id == l).get.name) }#
#if (libs.head != libId) <tr> #end
#if (!sampleLevel) <td><a href="${rootPath}Samples/${sample.name}/Libraries/${libName}/index.html">${libName}</a></td> #end
#{
......
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._)
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(scala.concurrent.Await)
#import(scala.concurrent.duration.Duration)
#import(nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport)
#import(java.io.File)
<%@ var summary: SummaryDb %>
......@@ -17,10 +16,14 @@
<%@ var showIntro: Boolean = true%>
<%@ var runId: Int %>
<%@ var fields: List[String] = List("mean_coverage", "pct_5x", "pct_10x", "pct_15x", "pct_20x", "pct_25x", "pct_30x", "pct_40x", "pct_50x", "pct_60x", "pct_70x", "pct_80x", "pct_90x", "pct_100x")%>
<%@ var allSamples: Seq[Sample] %>
<%@ var allLibraries: Seq[Library] %>
#{
val samples = Await.result(summary.getSamples(runId = Some(runId)), Duration.Inf)
val samples = sampleId match {
case Some(id) => allSamples.filter(_.id == id).toList
case _ => allSamples.toList
}
}#
#if (showIntro)
<br/>
<div class="row">
......@@ -65,12 +68,12 @@
val libs: List[Option[Int]] = (libId, sampleLevel) match {
case (_, true) => List(None)
case (Some(_), _) => List(libId)
case _ => Await.result(summary.getLibraries(sampleId = Some(sample.id), runId = Some(runId)), Duration.Inf).map(x => Some(x.id)).toList
case _ => allLibraries.filter(_.sampleId == sample.id).map(x => Some(x.id)).toList
}
}#
<tr><td rowspan="${libs.size}"><a href="${rootPath}Samples/${sample.name}/index.html">${sample.name}</a></td>
#for (libId <- libs)
#{ val libName = libId.map(l => Await.result(summary.getLibraryName(l), Duration.Inf)) }#
#{ val libName = libId.map(l => allLibraries.find(_.id == l).get.name) }#
#if (libs.head != libId) <tr> #end
#if (!sampleLevel) <td><a href="${rootPath}Samples/${sample.name}/Libraries/${libName}/index.html">${libName}</a></td> #end
#{
......
......@@ -48,9 +48,7 @@ object BammetricsReport extends ReportBuilder {
ReportPage(bamMetricsPage.subPages ::: List(
"Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp"
)), Map()),
"Files" -> ReportPage(List(), List(
"Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bammetricsInputFile.ssp")
), Map())
"Files" -> ReportPage(List(), List(), Map())
), List(
"Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
) ::: bamMetricsPage.sections,
......
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(scala.concurrent.Await)
#import(scala.concurrent.duration.Duration)
<%@ var summary: SummaryDb %>
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema._)
<%@ var rootPath: String %>
<%@ var sampleId: Option[Int] = None %>
<%@ var runId: Int %>
<%@ var allLibraries: Seq[Library] %>
<%@ var allSamples: Seq[Sample] %>
<table class="table">
<thead><tr><th>Libraries</th></tr></thead>
<tbody>
#for (lib <- Await.result(summary.getLibraries(runId = Some(runId)), Duration.Inf))
<tr><td><a href="${rootPath}Samples/${Await.result(summary.getSampleName(lib.sampleId), Duration.Inf)}/Libraries/${lib.name}/index.html">${lib}</a></td></tr>
#{ val libs = sampleId match {
case Some(id) => allLibraries.filter(_.sampleId == id)
case _ => allLibraries
} }#
#for (lib <- libs)
#{ val sampleName: String = allSamples.find(_.id == lib.sampleId).get.name }#
<tr><td><a href="${rootPath}Samples/${sampleName}/Libraries/${lib.name}/index.html">${lib}</a></td></tr>
#end
</tbody>
</table>
\ No newline at end of file
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
#import(scala.concurrent.Await)
#import(scala.concurrent.duration.Duration)
<%@ var summary: SummaryDb %>
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema._)
<%@ var rootPath: String %>
<%@ var runId: Int %>
<%@ var allSamples: Seq[Sample] %>
<table class="table sortable-theme-bootstrap" data-sortable>
<thead><tr><th data-sorted="true" data-sorted-direction="ascending">Sample</th></tr></thead>
<tbody>
#for (sample <- Await.result(summary.getSamples(runId = Some(runId)), Duration.Inf))
#for (sample <- allSamples)
<tr><td><a href="${rootPath}Samples/${sample.name}/index.html">${sample.name}</a></td></tr>
#end
</tbody>
......
......@@ -54,7 +54,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
}
writer.println("set -eubf")
writer.println("set -o pipefail")
lines.foreach(writer.println)
writer.println(this.commandLine)
jobDelayTime.foreach(x => writer.println(s"sleep $x"))
writer.close()
}
......@@ -99,6 +99,8 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
beforeGraph()
internalBeforeGraph()
this.commandDirectory = this.jobOutputFile.getParentFile
super.freezeFieldValues()
}
......
......@@ -38,17 +38,16 @@ class CleverCaller(val parent: Configurable) extends BiopetCommandLineFunction w
@Input(doc = "Reference")
var reference: File = _
protected def cleverOutputDir: File = new File(cleverWorkDir, "work")
var cleverWorkDir: File = _
@Output(doc = "Clever VCF output")
lazy val outputvcf: File = {
new File(cleverOutputDir, "predictions.vcf")
new File(cleverWorkDir, "predictions.vcf")
}
@Output(doc = "Clever raw output")
lazy val outputraw: File = {
new File(cleverOutputDir, "predictions.raw.txt")
new File(cleverWorkDir, "predictions.raw.txt")
}
// var T: Option[Int] = config("T", default = defaultThreads)
......@@ -60,13 +59,13 @@ class CleverCaller(val parent: Configurable) extends BiopetCommandLineFunction w
override def beforeGraph() {
super.beforeGraph()
if (cleverOutputDir == null) throw new Exception("Clever :: Workdirectory is not defined")
if (cleverWorkDir == null) throw new Exception("Clever :: Workdirectory is not defined")
if (reference == null) reference = referenceFasta()
}
def cmdLine = required(executable) +
" --sorted " +
" --use_xa " +
required("--sorted") +
required("--use_xa") +
optional("-T", threads) +
conditional(f, "-f") +
conditional(a, "-a") +
......@@ -74,7 +73,7 @@ class CleverCaller(val parent: Configurable) extends BiopetCommandLineFunction w
conditional(r, "-r") +
required(input) +
required(reference) +
required(cleverOutputDir)
required(cleverWorkDir)
}
object CleverCaller {
......
......@@ -70,9 +70,9 @@ In the `tags` key inside a sample or library users can supply tags that belong t
The settings config enables a user to alter the settings for almost all settings available in the tools used for a given pipeline.
This config file should be written in either JSON or YAML format. It can contain setup settings like:
* references,
* cut offs,
* program modes and memory limits (program specific),
* references
* cut offs
* program modes and memory limits (program specific)
* Whether chunking should be used
* set program executables (if for some reason the user does not want to use the systems default tools)
* One could set global variables containing settings for all tools used in the pipeline or set tool specific options one layer
......@@ -128,9 +128,13 @@ It is also possible to set the `"species"` flag. Again, we will default to `unkn
# More advanced use of config files.
### 4 levels of configuring settings
In biopet, a value of a ConfigNamespace (e.g., "reference_fasta") for a tool or a pipeline can be defined in 4 different levels.
* Level-4: As a fixed value hardcoded in biopet source code
* Level-3: As a user specified value in the user config file
* Level-2: As a system specified value in the global config files. On the LUMC's SHARK cluster, these global config files are located at /usr/local/sasc/config.
* Level-1: As a default value provided in biopet source code.
During execution, biopet framework will resolve the value for each ConfigNamespace following the order from level-4 to level-1. Hence, a value defined in the a higher level will overwrite a value define in a lower level for the same ConfigNamespace.
......@@ -172,4 +176,4 @@ biopet template Gentrap -o gentrap_config.yml -s gentrap_run.sh
| -o | --outputConfig | Path (**required**) | Name of the config file that gets generated.|
| -s | --outputScript | Path (optional) | Biopet can also output a script that can be directly used for running the pipeline, the call of the pipeline is generated with the config file as input. This parameter sets the name for the script file.|
| -t | --template | Path (optional) | A template file with 2 placeholders *%s* is required for generating the script. The first placeholder will be replaced with the name of the pipeline, the second with the paths to the sample and settings config files. When Biopet has been pre-configured to use the default template file, then setting this parameter is optional. |
| | --expert | | This flag enables the user to configure a more extensive list of parameters for the pipeline. |
\ No newline at end of file
| | --expert | | This flag enables the user to configure a more extensive list of parameters for the pipeline. |
......@@ -49,7 +49,7 @@ All other values should be provided in the config. Specific config values toward
| ---- | ---- | -------- |
| output_dir | Path (**required**) | directory for output files |
| reference_fasta | Path (**required**) | Path to indexed fasta file to be used as reference |
| aligner | String (optional) | Which aligner to use. Defaults to `bwa`. Choose from [`bwa`, `bwa-aln`, `bowtie`, `gsnap`, `tophat`, `stampy`, `star`, `star-2pass`, `hisat2`] |
| aligner | String (optional) | Which aligner to use. Defaults to `bwa`. Choose from [`bwa-mem`, `bwa-aln`, `bowtie`, `bowtie2`, `gsnap`, `tophat`, `stampy`, `star`, `star-2pass`, `hisat2`] |
| skip_flexiprep | Boolean (optional) | Whether to skip the flexiprep QC step (default = False) |
| skip_markduplicates | Boolean (optional) | Whether to skip the Picard Markduplicates step (default = False) |
| skip_metrics | Boolean (optional) | Whether to skip the metrics gathering step (default = False) |
......
......@@ -2,164 +2,140 @@
## Introduction
This pipeline is build for variant calling on NGS data (preferably Illumina data).
It is based on the <a href="https://www.broadinstitute.org/gatk/guide/best-practices" target="_blank">best practices</a>) of GATK in terms of their approach to variant calling.
This pipeline is built for variant calling on NGS data (preferably Illumina data). Part of this pipeline resembles the <a href="https://www.broadinstitute.org/gatk/guide/best-practices" target="_blank">best practices</a>) of GATK in terms of their approach to variant calling.
The pipeline accepts ```.fastq & .bam``` files as input.
----
## Tools for this pipeline
## Overview of tools and sub-pipelines for this pipeline
* [Flexiprep for QC](flexiprep.md)
* [Metagenomics analysis](gears.md)
* [Mapping](mapping.md)
* [VEP annotation](toucan.md)
* [CNV analysis](kopisu.md)
* <a href="http://broadinstitute.github.io/picard/" target="_blank">Picard tool suite</a>
* [Flexiprep](flexiprep.md)
* <a href="https://www.broadinstitute.org/gatk/" target="_blank">GATK tools</a>:
* GATK
* Freebayes
* Bcftools
* Samtools
* <a href="https://www.broadinstitute.org/gatk/" target="_blank">GATK tools</a>
* <a href="https://github.com/ekg/freebayes" target="_blank">Freebayes</a>
* <a href="http://dkoboldt.github.io/varscan/" target="_blank">Varscan</a>
* <a href="https://samtools.github.io/bcftools/bcftools.html" target="_blank">Bcftools</a>
* <a href="http://www.htslib.org/" target="_blank">Samtools</a>
----
## Example
## Basic usage
Note that one should first create the appropriate [configs](../general/config.md).
Note that one should first create the appropriate sample and pipeline setting [configs](../general/config.md).
### Sample input extensions
Shiva pipeline can start from FASTQ or BAM files. This pipeline will include pre-process steps for the BAM files.
Please refer [to our mapping pipeline](mapping.md) for information about how the input samples should be handled.
Shiva is a special pipeline in the sense that it can also start directly from `bam` files. Note that one should alter the sample config field from `R1` into `bam`.
### Full pipeline
The full pipeline can start from fastq or from bam file. This pipeline will include pre-process steps for the bam files.
When using BAM files as input, Note that one should alter the sample config field from `R1` into `bam`.
To view the help menu, execute:
~~~
biopet pipeline shiva -h
Arguments for Shiva:
-sample,--onlysample <onlysample> Only Sample
-config,--config_file <config_file> JSON config file(s)
-DSC,--disablescatterdefault Disable all scatters
~~~
To run the pipeline:
~~~
biopet pipeline shiva -config MySamples.json -config MySettings.json -run
~~~
A dry run can be performed by simply removing the `-run` flag from the command line call.
[Gears](gears) is run automatically for the data analysed with `Shiva`. There are two levels on which this can be done and this should be specified in the [config](../general/config) file:
*`mapping_to_gears: unmapped` : Unmapped reads after alignment. (default)
*`mapping_to_gears: all` : Trimmed and clipped reads from [Flexiprep](flexiprep).
*`mapping_to_gears: none` : Disable this functionality.
### Only variant calling
It is possible to run Shiva while only performing its variant calling steps.
This has been separated in its own pipeline named `shivavariantcalling`.
As this calling pipeline starts from BAM files, it will naturally not perform any pre-processing steps.
To view the help menu, execute:
~~~
java -jar </path/to/biopet.jar> pipeline shivavariantcalling -h
Arguments for ShivaVariantcalling:
-BAM,--inputbams <inputbams> Bam files (should be deduped bams)
-sample,--sampleid <sampleid> Sample ID (only effects summary and not required)
-library,--libid <libid> Library ID (only effects summary and not required)
-config,--config_file <config_file> JSON config file(s)
-s,--sample <sample> Only Process This Sample
-config,--config_file <config_file> JSON / YAML config file(s)
-cv,--config_value <config_value> Config values, value should be formatted like 'key=value' or
'namespace:namespace:key=value'
-DSC,--disablescatter Disable all scatters
~~~
To run the pipeline:
~~~
biopet pipeline shivavariantcalling -config MySettings.json -run
biopet pipeline shiva -config MySamples.yml -config MySettings.yml -run
~~~
A dry run can be performed by simply removing the `-run` flag from the command line call.
An example of MySettings.yml file is provided here and more detailed config options are explained in [config options](#config-options).
``` yaml
samples:
SampleID:
libraries:
lib_id_1:
bam: YourBam.bam
lib_id_2:
R1: file_R1.fq.gz
R2: file_R2.fq.gz
species: H.sapiens
reference_name: GRCh38_no_alt_analysis_set
dbsnp_vcf: <dbsnp.vcf.gz>
vcffilter:
min_alternate_depth: 1
output_dir: <output directory>
variantcallers:
- haplotypecaller
- unifiedgenotyper
- haplotypecaller_gvcf
unifiedgenotyper:
merge_vcf_results: false # This will do the variantcalling but will not merged into the final vcf file
```
----
## Variant caller
## Supported variant callers
At this moment the following variant callers can be used
* <a href="https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php">haplotypecaller</a>
* Running default HaplotypeCaller
* <a href="https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php">haplotypecaller_gvcf</a>
* Running HaplotypeCaller in gvcf mode
* <a href="https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php">haplotypecaller_allele</a>
* Only genotype a given list of alleles with HaplotypeCaller
* <a href="https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_gatk_tools_walkers_genotyper_UnifiedGenotyper.php">unifiedgenotyper</a>
* Running default UnifiedGenotyper
* <a href="https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_gatk_tools_walkers_genotyper_UnifiedGenotyper.php">unifiedgenotyper_allele</a>
* Only genotype a given list of alleles with UnifiedGenotyper
* <a href="https://samtools.github.io/bcftools/bcftools.html">bcftools</a>
* <a href="https://samtools.github.io/bcftools/bcftools.html">bcftools_singlesample</a>
* <a href="https://github.com/ekg/freebayes">freebayes</a>