diff --git a/docs/general/requirements.md b/docs/general/requirements.md new file mode 100644 index 0000000000000000000000000000000000000000..0105f7ccc29dcbd5def4b6b49a6bb1235031d858 --- /dev/null +++ b/docs/general/requirements.md @@ -0,0 +1,17 @@ +### System Requirements + +Biopet is build on top of GATK Queue, which requires having `java` installed on the analysis machine(s). + +For end-users: + + * [Java 7 JVM](http://www.oracle.com/technetwork/java/javase/downloads/index.html) or [OpenJDK 7](http://openjdk.java.net/install/) + * [Cran R 2.15.3](http://cran.r-project.org/) + +For developers: + + * [OpenJDK 7](http://openjdk.java.net/install/) + * Minimum of 4 GB RAM {todo: provide more accurate estimation on building} + * Maven 3 + * Compiled and installed version 3.4 of [GATK + Queue](https://github.com/broadgsa/gatk-protected/) in your maven repository. + * IntelliJ or Netbeans 8.0 for development + diff --git a/mkdocs.yml b/mkdocs.yml index b5cb8fdb5a2eb8564edf286f868faf57fc58ceca..87a79146a31b767e9a6c551c76df62a168853204 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -3,6 +3,7 @@ pages: - Home: 'index.md' - General: - Config: 'general/config.md' + - Requirements: 'general/requirements.md' - Pipelines: - Basty: 'pipelines/basty.md' - Bam2Wig: 'pipelines/bam2wig.md' diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala index 6cdea7db15affd6cf871f393d7de2cf0be3cc2fb..088123a760b7cc9f14821a1a7f0d7aa37ed27d99 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala @@ -58,34 +58,37 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction with Refe var strata: Boolean = config("strata", default = false) var maqerr: Option[Int] = config("maqerr") var maxins: Option[Int] = config("maxins") + var largeIndex: Boolean = config("large-index", default = false) override def beforeGraph() { super.beforeGraph() if (reference == null) reference = referenceFasta() + val basename = reference.getName.stripSuffix(".fasta").stripSuffix(".fa") + if (reference.getParentFile.list().toList.filter(_.startsWith(basename)).exists(_.endsWith(".ebwtl"))) + largeIndex = config("large-index", default = true) } /** return commandline to execute */ - def cmdLine = { - required(executable) + - optional("--threads", threads) + - conditional(sam, "--sam") + - conditional(best, "--best") + - conditional(strata, "--strata") + - optional("--sam-RG", sam_RG) + - optional("--seedlen", seedlen) + - optional("--seedmms", seedmms) + - optional("-k", k) + - optional("-m", m) + - optional("--maxbts", maxbts) + - optional("--maqerr", maqerr) + - optional("--maxins", maxins) + - required(reference) + - (R2 match { - case Some(r2) => - required("-1", R1) + - optional("-2", r2) - case _ => required(R1) - }) + - " > " + required(output) - } + def cmdLine = required(executable) + + optional("--threads", threads) + + conditional(sam, "--sam") + + conditional(largeIndex, "--large-index") + + conditional(best, "--best") + + conditional(strata, "--strata") + + optional("--sam-RG", sam_RG) + + optional("--seedlen", seedlen) + + optional("--seedmms", seedmms) + + optional("-k", k) + + optional("-m", m) + + optional("--maxbts", maxbts) + + optional("--maqerr", maqerr) + + optional("--maxins", maxins) + + required(reference.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta")) + + (R2 match { + case Some(r2) => + required("-1", R1) + + optional("-2", r2) + case _ => required(R1) + }) + + " > " + required(output) } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala index e908a72704edc0d7acc5981c80e5a5de43a1f50d..2ecea3868842bec8f8c9f34a128218becf08ecbc 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala @@ -72,7 +72,7 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction with Refere if (reference == null) reference = referenceFasta() genomeDir = config("genomeDir", new File(reference.getAbsoluteFile.getParent, "star")) if (outFileNamePrefix != null && !outFileNamePrefix.endsWith(".")) outFileNamePrefix += "." - val prefix = if (outFileNamePrefix != null) outputDir + outFileNamePrefix else outputDir + val prefix = if (outFileNamePrefix != null) outputDir + File.separator + outFileNamePrefix else outputDir + File.separator if (runmode == null) { outputSam = new File(prefix + "Aligned.out.sam") outputTab = new File(prefix + "SJ.out.tab") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Tophat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Tophat.scala index 82332de297f12df794fa3d770c3dcc22da04f57a..d32eb27a3d782189bdbd623eee3d17f375b2d139 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Tophat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Tophat.scala @@ -17,14 +17,14 @@ package nl.lumc.sasc.biopet.extensions import java.io.File -import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +import nl.lumc.sasc.biopet.core.{ Reference, BiopetCommandLineFunction } import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } /** * Extension for Tophat */ -class Tophat(val root: Configurable) extends BiopetCommandLineFunction { +class Tophat(val root: Configurable) extends BiopetCommandLineFunction with Reference { executable = config("exe", default = "tophat", freeVar = false) @@ -264,6 +264,16 @@ class Tophat(val root: Configurable) extends BiopetCommandLineFunction { var rg_platform: Option[String] = config("rg_platform") + override def beforeGraph: Unit = { + super.beforeGraph + if (bowtie1 && !new File(bowtie_index).getParentFile.list().toList + .filter(_.startsWith(new File(bowtie_index).getName)).exists(_.endsWith(".bt2"))) + throw new IllegalArgumentException("No bowtie1 index found for tophat") + else if (!new File(bowtie_index).getParentFile.list().toList + .filter(_.startsWith(new File(bowtie_index).getName)).exists(_.endsWith(".ebwt"))) + throw new IllegalArgumentException("No bowtie2 index found for tophat") + } + def cmdLine: String = required(executable) + optional("-o", output_dir) + conditional(bowtie1, "--bowtie1") + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala index 19122634b79337d0ef67642f85700b8a75c0ad5a..f347ad3574fc6b13ab2be8abb6bdb4b1f64057fb 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala @@ -45,6 +45,14 @@ class MergeSamFiles(val root: Configurable) extends Picard { @Argument(doc = "COMMENT", required = false) var comment: Option[String] = config("comment") + @Output(doc = "Bam Index", required = true) + private var outputIndex: File = _ + + override def beforeGraph() { + super.beforeGraph() + if (createIndex) outputIndex = new File(output.getAbsolutePath.stripSuffix(".bam") + ".bai") + } + /** Returns command to execute */ override def commandLine = super.commandLine + repeat("INPUT=", input, spaceSeparated = false) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala index 46cd2b40cd8645a432aee54081f54e9dd0ec3351..0f0e11c62ac8f935a064b5b3b042d98527e48934 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala @@ -19,6 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.utils.ConfigUtils._ +import scala.collection.mutable import scala.io.Source @@ -45,21 +46,30 @@ object SamplesTsvToJson extends ToolCommand { val header = lines.head.split("\t") val sampleColumn = header.indexOf("sample") val libraryColumn = header.indexOf("library") - if (sampleColumn == -1) throw new IllegalStateException("sample column does not exist in: " + inputFile) + if (sampleColumn == -1) throw new IllegalStateException("Sample column does not exist in: " + inputFile) + + val sampleLibCache: mutable.Set[(String, Option[String])] = mutable.Set() val librariesValues: List[Map[String, Any]] = for (tsvLine <- lines.tail) yield { val values = tsvLine.split("\t") + require(header.length == values.length, "Number of columns is not the same as the header") val sample = values(sampleColumn) - val library = if (libraryColumn != -1) values(libraryColumn) else null + val library = if (libraryColumn != -1) Some(values(libraryColumn)) else None + + //FIXME: this is a workaround, should be removed after fixing #180 + if (sample.head.isDigit || library.forall(_.head.isDigit)) + throw new IllegalStateException("Sample or library may not start with a number") + + if (sampleLibCache.contains((sample, library))) + throw new IllegalStateException(s"Combination of $sample and $library is found multiple times") + else sampleLibCache.add((sample, library)) val valuesMap = (for ( t <- 0 until values.size if !values(t).isEmpty && t != sampleColumn && t != libraryColumn ) yield header(t) -> values(t)).toMap - val map: Map[String, Any] = if (library != null) { - Map("samples" -> Map(sample -> Map("libraries" -> Map(library -> valuesMap)))) - } else { - Map("samples" -> Map(sample -> valuesMap)) + library match { + case Some(lib) => Map("samples" -> Map(sample -> Map("libraries" -> Map(library -> valuesMap)))) + case _ => Map("samples" -> Map(sample -> valuesMap)) } - map } librariesValues.foldLeft(Map[String, Any]())((acc, kv) => mergeMaps(acc, kv)) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala index 8543b7706670ef40d9a877f27a0bf1b1ed3d77a9..733ebc00d950333811e8101ddb339505818770b5 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VepNormalizer.scala @@ -47,7 +47,7 @@ class VepNormalizer(val root: Configurable) extends ToolCommandFuntion { @Output(doc = "Output VCF", shortName = "OutputFile", required = true) var outputVcf: File = null - var mode: String = config("mode", default = "explode") + var mode: String = config("mode", default = "standard") var doNotRemove: Boolean = config("donotremove", default = false) override def defaultCoreMemory = 1.0 diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala index c855736f77111a7695b812ebf9af3b3b089a6887..ab703e5b5e44a7cc0762e8cff51b1f3b23c66ab9 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingReport.scala @@ -35,18 +35,22 @@ object MappingReport extends ReportBuilder { /** Root page for single BamMetrcis report */ def indexPage = { - val bamMetricsPage = BammetricsReport.bamMetricsPage(summary, sampleId, libId) - ReportPage(List("QC" -> FlexiprepReport.flexiprepPage) ::: bamMetricsPage.subPages ::: List( - "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp" - )), Map()), - "Files" -> ReportPage(List(), List( - "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"), - "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"), - "Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)) - ), Map()) - ), List( + val skipFlexiprep = summary.getValue(sampleId, libId, "mapping", "settings", "skip_flexiprep").getOrElse(false) == true + val bamMetricsPage = if (summary.getValue(sampleId, libId, "mapping", "settings", "skip_metrics").getOrElse(false) != true) { + Some(BammetricsReport.bamMetricsPage(summary, sampleId, libId)) + } else None + ReportPage((if (skipFlexiprep) Nil else List("QC" -> FlexiprepReport.flexiprepPage)) ::: + bamMetricsPage.map(_.subPages).getOrElse(Nil) ::: List( + "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp" + )), Map()), + "Files" -> ReportPage(List(), (if (skipFlexiprep) Nil else List( + "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"), + "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"))) ::: + List("Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)) + ), Map()) + ), List( "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/mappingFront.ssp") - ) ::: bamMetricsPage.sections, + ) ::: bamMetricsPage.map(_.sections).getOrElse(Nil), Map() ) } diff --git a/public/pom.xml b/public/pom.xml index 93acfae5e8cdc28359579b70eac5cddc2643f161..a59a97680ac2b080f8a6311d18a56af24f35b6aa 100644 --- a/public/pom.xml +++ b/public/pom.xml @@ -72,6 +72,7 @@ <artifactId>maven-surefire-plugin</artifactId> <version>2.18.1</version> <configuration> + <forkCount>1C</forkCount> <workingDirectory>${project.build.directory}</workingDirectory> </configuration> </plugin>