Commit 6fb4feb5 authored by Peter van 't Hof's avatar Peter van 't Hof

Fix tinycap dir

parent d2c438c8
target
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>Biopet</artifactId>
<groupId>nl.lumc.sasc</groupId>
<version>0.7.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<inceptionYear>2016</inceptionYear>
<artifactId>TinyCap</artifactId>
<dependencies>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Mapping</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Gentrap</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.10</artifactId>
<version>2.2.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
#import(nl.lumc.sasc.biopet.utils.summary.Summary)
#import(nl.lumc.sasc.biopet.core.report.ReportPage)
<%@ var summary: Summary %>
<%@ var rootPath: String %>
<%@ var pipeline: String %>
<table class="table">
<tbody>
<tr><th>Pipeline</th><td>${pipeline}</td></tr>
<tr><th>Version</th><td>${summary.getValue("meta", "pipeline_version")}</td></tr>
<tr><th>Last commit hash</th><td>${summary.getValue("meta", "last_commit_hash")}</td></tr>
<tr><th>Output directory</th><td>${summary.getValue("meta", "output_dir")}</td></tr>
<tr><th>Reference</th><td>${summary.getValue(pipeline, "settings", "reference", "species")} - ${summary.getValue(pipeline, "settings", "reference", "name")}</td></tr>
<tr><th>Number of samples</th><td>${summary.samples.size}</td></tr>
</tbody>
</table>
<br/>
<div class="row">
<div class="col-md-1"></div>
<div class="col-md-10">
<p>
In this web document you can find your <em><strong>${pipeline}</strong></em> pipeline report.
Different categories of data can be found in the left-side menu.
Statistics per sample and library can be accessed through the top-level menu.
Futhermore, you can view all versions of software tools used by selecting <em><a href="./Versions/index.html">Versions</a></em> from the top menu.
</p>
<p>
<small>Brought to you by <a href="https://sasc.lumc.nl" target="_blank"><abbr
title="Sequence Analysis Support Core">SASC</abbr></a> and <a
href="https://www.lumc.nl/org/klinische-genetica/" target="_blank"><abbr title="Clinical Genetics LUMC">KG</abbr></a>,
LUMC.
</small>
</p>
</div>
<div class="col-md-1"></div>
</div>
\ No newline at end of file
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.tinycap
import java.io.File
import nl.lumc.sasc.biopet.core.annotations.{ AnnotationGff, AnnotationGtf, AnnotationRefFlat }
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
import nl.lumc.sasc.biopet.pipelines.gentrap.measures.{ BaseCounts, FragmentsPerGene }
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
import nl.lumc.sasc.biopet.pipelines.tinycap.measures.FragmentsPerSmallRna
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import picard.analysis.directed.RnaSeqMetricsCollector.StrandSpecificity
/**
* Created by pjvan_thof on 12/29/15.
* Design based on work from Henk Buermans (e-Mir)
* Implementation by wyleung started 19/01/16
*/
class TinyCap(val root: Configurable) extends QScript
with MultisampleMappingTrait
with AnnotationRefFlat
with AnnotationGff
with AnnotationGtf
with Reference {
qscript =>
def this() = this(null)
var annotateSam: Boolean = config("annotate_sam", default = false)
override def defaults = Map(
"igvtoolscount" -> Map(
"strands" -> "reads",
"includeDuplicates" -> true
),
"merge_strategy" -> "preprocessmergesam",
"keep_merged_files" -> true,
"mapping" -> Map(
"aligner" -> "bowtie",
"generate_wig" -> true,
"skip_markduplicates" -> true
),
"bammetrics" -> Map(
"wgs_metrics" -> false,
"rna_metrics" -> false,
"collectrnaseqmetrics" -> Map(
"strand_specificity" -> StrandSpecificity.SECOND_READ_TRANSCRIPTION_STRAND.toString
)
),
"bowtie" -> Map(
"chunkmbs" -> 256,
"seedmms" -> 3,
"seedlen" -> 25,
"k" -> 5,
"best" -> true
),
"sickle" -> Map(
"lengthThreshold" -> 15
),
"fastqc" -> Map(
"sensitiveAdapterSearch" -> true
),
"cutadapt" -> Map(
"error_rate" -> 0.2,
"minimum_length" -> 15,
"q" -> 30,
"default_clip_mode" -> "both",
"times" -> 2
)
)
lazy val fragmentsPerGene = new FragmentsPerGene(this)
lazy val fragmentsPerSmallRna = new FragmentsPerSmallRna(this)
lazy val baseCounts = new BaseCounts(this)
def executedMeasures = (fragmentsPerGene :: fragmentsPerSmallRna :: baseCounts :: Nil)
override def init = {
super.init()
executedMeasures.foreach(x => x.outputDir = new File(outputDir, "expression_measures" + File.separator + x.name))
}
override def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends super.Sample(sampleId) {
override def addJobs(): Unit = {
super.addJobs()
preProcessBam.foreach { file =>
executedMeasures.foreach(_.addBamfile(sampleId, file))
}
}
}
override def summaryFile = new File(outputDir, "tinycap.summary.json")
override def summaryFiles: Map[String, File] = super.summaryFiles ++ Map(
"annotation_refflat" -> annotationRefFlat(),
"annotationGtf" -> annotationGtf,
"annotationGff" -> annotationGff
)
override def reportClass: Option[ReportBuilderExtension] = {
val report = new TinyCapReport(this)
report.outputDir = new File(outputDir, "report")
report.summaryFile = summaryFile
Some(report)
}
override def addMultiSampleJobs = {
super.addMultiSampleJobs
executedMeasures.foreach(add)
}
}
object TinyCap extends PipelineCommand
\ No newline at end of file
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.tinycap
import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportSection }
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingReportTrait
import nl.lumc.sasc.biopet.utils.config.Configurable
/**
* Created by wyleung on 4-2-16.
*/
class TinyCapReport(val root: Configurable) extends ReportBuilderExtension {
def builder = TinyCapReport
}
object TinyCapReport extends MultisampleMappingReportTrait {
/** Name of the report */
def reportName = "TinyCap Report"
/** Front section for the report */
override def frontSection: ReportSection = ReportSection("/nl/lumc/sasc/biopet/pipelines/tinycap/tinycapFront.ssp")
override def additionalSections = List(
"Fragments per gene" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gentrap/measure_plotreport.ssp",
Map("pipelineName" -> pipelineName,
"plotName" -> "fragmentspergene",
"plotPath" -> summary.getValue("fragmentspergene", "files", "pipeline", "fragments_per_gene_heatmap", "path")
)),
"Fragments per microRNA" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gentrap/measure_plotreport.ssp",
Map("pipelineName" -> pipelineName,
"plotName" -> "fragmentspersmallrna",
"plotPath" -> summary.getValue("fragmentspersmallrna", "files", "pipeline", "fragments_per_smallrna_heatmap", "path")))
)
override def pipelineName = "tinycap"
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.tinycap.measures
import nl.lumc.sasc.biopet.core.annotations.AnnotationGff
import nl.lumc.sasc.biopet.extensions.HtseqCount
import nl.lumc.sasc.biopet.pipelines.gentrap.measures.Measurement
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by wyleung on 11-2-16.
*/
class FragmentsPerSmallRna(val root: Configurable) extends QScript with Measurement with AnnotationGff {
def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0")
/** Pipeline itself */
def biopetScript(): Unit = {
val jobs = bamFiles.map {
case (id, file) =>
// Do expression counting for miRNA and siRNA
val job = new HtseqCount(this)
job.inputAlignment = file
job.inputAnnotation = annotationGff
job.format = Option("bam")
job.stranded = Option("yes")
job.featuretype = Option("miRNA")
job.idattr = Option("Name")
job.output = new File(outputDir, s"$id.$name.counts")
add(job)
id -> job
}
addMergeTableJob(jobs.values.map(_.output).toList, mergedTable, "fragments_per_smallrna", s".$name.counts")
addHeatmapJob(mergedTable, heatmap, "fragments_per_smallrna")
addSummaryJobs()
}
def mergedTable = new File(outputDir, s"$name.fragments_per_smallrna.tsv")
def heatmap = new File(outputDir, s"$name.fragments_per_smallrna.png")
}
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
/**
* Created by wyleung on 11-2-16.
*/
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.tinycap
import java.io.File
import com.google.common.io.Files
import nl.lumc.sasc.biopet.extensions.HtseqCount
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Config
import org.apache.commons.io.FileUtils
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ AfterClass, DataProvider, Test }
class TinyCapTest extends TestNGSuite with Matchers {
def initPipeline(map: Map[String, Any]): TinyCap = {
new TinyCap() {
override def configNamespace = "tinycap"
override def globalConfig = new Config(map)
qSettings = new QSettings
qSettings.runName = "test"
}
}
@DataProvider(name = "tinyCapOptions")
def tinyCapOptions = {
val bool = Array(true)
for (
s1 <- bool
) yield Array("", s1)
}
@Test(dataProvider = "tinyCapOptions")
def testTinyCap(dummy: String, sample1: Boolean): Unit = {
val map = {
var m: Map[String, Any] = TinyCapTest.config
if (sample1) m = ConfigUtils.mergeMaps(TinyCapTest.sample1, m)
m
}
if (!sample1) { // When no samples
intercept[IllegalArgumentException] {
initPipeline(map).script()
}
}
val pipeline = initPipeline(map)
pipeline.script()
// expect 2 instances of HtSeqCount, one for mirna.gff other for transcripts.gtf
pipeline.functions.count(_.isInstanceOf[HtseqCount]) shouldBe 2
}
// remove temporary run directory all tests in the class have been run
@AfterClass def removeTempOutputDir() = {
FileUtils.deleteDirectory(TinyCapTest.outputDir)
}
}
object TinyCapTest {
val outputDir = Files.createTempDir()
new File(outputDir, "input").mkdirs()
val r1 = new File(outputDir, "input" + File.separator + "R1.fq.gz")
Files.touch(r1)
val bam = new File(outputDir, "input" + File.separator + "bamfile.bam")
Files.touch(bam)
val referenceFasta = new File(outputDir, "ref.fa")
Files.touch(referenceFasta)
val referenceFastaDict = new File(outputDir, "ref.dict")
Files.touch(referenceFastaDict)
val bowtieIndex = new File(outputDir, "ref.1.ebwt")
Files.touch(bowtieIndex)
val annotationGFF = new File(outputDir, "annot.gff")
val annotationGTF = new File(outputDir, "annot.gtf")
val annotationRefflat = new File(outputDir, "annot.refflat")
Files.touch(annotationGFF)
Files.touch(annotationGTF)
Files.touch(annotationRefflat)
val config = Map(
"output_dir" -> outputDir,
"reference_fasta" -> (referenceFasta.getAbsolutePath),
"bowtie_index" -> (bowtieIndex.getAbsolutePath),
"annotation_gff" -> annotationGFF,
"annotation_gtf" -> annotationGTF,
"annotation_refflat" -> annotationRefflat,
"md5sum" -> Map("exe" -> "test"),
"fastqc" -> Map("exe" -> "test"),
"seqtk" -> Map("exe" -> "test"),
"sickle" -> Map("exe" -> "test"),
"cutadapt" -> Map("exe" -> "test"),
"bowtie" -> Map("exe" -> "test"),
"htseqcount" -> Map("exe" -> "test"),
"igvtools" -> Map("exe" -> "test"),
"wigtobigwig" -> Map("exe" -> "test")
)
val sample1 = Map(
"samples" -> Map("sample1" -> Map("libraries" -> Map(
"lib1" -> Map(
"R1" -> r1.getAbsolutePath
)
)
)))
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment