diff --git a/biopet-aggregate/copy-src.sh b/biopet-aggregate/copy-src.sh deleted file mode 100755 index fdfefa48461afe7f1f8e1b4aad226ab818caee25..0000000000000000000000000000000000000000 --- a/biopet-aggregate/copy-src.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -DIR=`readlink -f \`dirname $0\`` - -cp -r $DIR/../*/*/src/* $DIR/src - diff --git a/biopet-aggregate/pom.xml b/biopet-aggregate/pom.xml index 04f8d08e04d528a2fa59a547dcd365a1fcd68461..e29997cc6d6e3cd65e557393d3153d63bba30468 100644 --- a/biopet-aggregate/pom.xml +++ b/biopet-aggregate/pom.xml @@ -2,45 +2,42 @@ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <parent> - <artifactId>BiopetRoot</artifactId> - <groupId>nl.lumc.sasc</groupId> - <version>0.6.0-SNAPSHOT</version> - </parent> + <modelVersion>4.0.0</modelVersion> <artifactId>BiopetAggregate</artifactId> + <packaging>pom</packaging> - <dependencies> - <dependency> - <groupId>org.testng</groupId> - <artifactId>testng</artifactId> - <version>6.8</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.mockito</groupId> - <artifactId>mockito-all</artifactId> - <version>1.9.5</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.scalatest</groupId> - <artifactId>scalatest_2.10</artifactId> - <version>2.2.1</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetProtectedPackage</artifactId> - <version>0.6.0-SNAPSHOT</version> - </dependency> - <dependency> - <groupId>com.google.guava</groupId> - <artifactId>guava</artifactId> - <version>18.0</version> - </dependency> + <parent> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Biopet</artifactId> + <version>0.6.0-SNAPSHOT</version> + <relativePath>../public</relativePath> + </parent> - </dependencies> + <modules> + <module>../public/biopet-core</module> + <module>../public/biopet-public-package</module> + <module>../public/bammetrics</module> + <module>../public/flexiprep</module> + <module>../public/gentrap</module> + <module>../public/mapping</module> + <module>../public/sage</module> + <module>../public/kopisu</module> + <module>../public/gears</module> + <module>../public/bam2wig</module> + <module>../public/carp</module> + <module>../public/toucan</module> + <module>../public/shiva</module> + <module>../public/basty</module> + <module>../public/biopet-utils</module> + <module>../public/biopet-tools</module> + <module>../public/biopet-tools-extensions</module> + <module>../public/biopet-extensions</module> + <module>../public/biopet-tools-package</module> + <module>../protected/biopet-gatk-extensions</module> + <module>../protected/biopet-gatk-pipelines</module> + <module>../protected/biopet-protected-package</module> + </modules> </project> \ No newline at end of file diff --git a/biopet-aggregate/rm-src.sh b/biopet-aggregate/rm-src.sh deleted file mode 100755 index f0a2e2b9307150913a9705bd237f226dc157157e..0000000000000000000000000000000000000000 --- a/biopet-aggregate/rm-src.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -DIR=`readlink -f \`dirname $0\`` - -rm -r $DIR/src/main $DIR/src/test - diff --git a/biopet-aggregate/src/.gitignore b/biopet-aggregate/src/.gitignore deleted file mode 100644 index 59e4d6049a3756e3d0c5414611e042fcc5c1bce6..0000000000000000000000000000000000000000 --- a/biopet-aggregate/src/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -main -test diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala index 60614ecc938bef58534c810423254fc56eab5dbc..30ec0d09721ee7d06c69da3fffa3681579b7cb19 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala @@ -8,10 +8,11 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.Output +import org.broadinstitute.gatk.utils.commandline.{ Gather, Output } class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral { + @Gather(enabled = false) @Output(required = false) protected var vcfIndex: File = _ diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala index 32cf797a0f77d488a5aa0c7fda32fcfaa422e038..868b6ed0a62f87cf169571debd16a125ce8e9fc9 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala @@ -8,9 +8,11 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.Output +import org.broadinstitute.gatk.utils.commandline.{ Gather, Output } class IndelRealigner(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.IndelRealigner with GatkGeneral { + + @Gather(enabled = false) @Output protected var bamIndex: File = _ diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala index 620619bfea1ccdb745f814d2b801c100437d4c76..cff40a548b4545230010ae648ccf93d0081718d6 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala @@ -21,7 +21,7 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait { def this() = this(null) /** Make variantcalling submodule, this with the gatk modes in there */ - override def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = { + override def makeVariantcalling(multisample: Boolean = false) = { if (multisample) new ShivaVariantcalling(qscript) { override def namePrefix = "multisample" override def configName = "shivavariantcalling" @@ -56,36 +56,40 @@ class Shiva(val root: Configurable) extends QScript with ShivaTrait { ("use_indel_realigner" -> useIndelRealigner) + ("use_base_recalibration" -> useBaseRecalibration) - /** This will adds preprocess steps, gatk indel realignment and base recalibration is included here */ - override def preProcess(input: File): Option[File] = { - if (!useIndelRealigner && !useBaseRecalibration) None - else { - val indelRealignFile = useIndelRealigner match { - case true => addIndelRealign(input, libDir, useBaseRecalibration || libraries.size > 1) - case false => input - } - - useBaseRecalibration match { - case true => Some(addBaseRecalibrator(indelRealignFile, libDir, libraries.size > 1)) - case false => Some(indelRealignFile) - } + override def preProcessBam = if (useIndelRealigner && useBaseRecalibration) + bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam")) + else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam")) + else if (useBaseRecalibration) bamFile.map(swapExt(libDir, _, ".bam", ".baserecal.bam")) + else bamFile + + override def addJobs(): Unit = { + super.addJobs() + if (useIndelRealigner && useBaseRecalibration) { + val file = addIndelRealign(bamFile.get, libDir, isIntermediate = true) + addBaseRecalibrator(file, libDir, libraries.size > 1) + } else if (useIndelRealigner) { + addIndelRealign(bamFile.get, libDir, libraries.size > 1) + } else if (useBaseRecalibration) { + addBaseRecalibrator(bamFile.get, libDir, libraries.size > 1) } } } + override def keepMergedFiles: Boolean = config("keep_merged_files", default = false) + override def summarySettings = super.summarySettings + ("use_indel_realigner" -> useIndelRealigner) lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true) - /** This methods will add double preprocess steps, with GATK indel realignment */ - override protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = { - if (input.size <= 1) super.addDoublePreProcess(input) - else super.addDoublePreProcess(input, isIntermediate = useIndelRealigner).collect { - case file => - useIndelRealigner match { - case true => addIndelRealign(file, sampleDir, isIntermediate = false) - case false => file - } + override def preProcessBam = if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) { + bamFile.map(swapExt(sampleDir, _, ".bam", ".realign.bam")) + } else bamFile + + override def addJobs(): Unit = { + super.addJobs() + + if (useIndelRealigner && libraries.values.flatMap(_.preProcessBam).size > 1) { + addIndelRealign(bamFile.get, sampleDir, false) } } } diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp b/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp new file mode 100644 index 0000000000000000000000000000000000000000..9c055ad97492ba782453472a4d7528f92dc28ade --- /dev/null +++ b/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp @@ -0,0 +1,93 @@ +#import(nl.lumc.sasc.biopet.utils.summary.Summary) +#import(nl.lumc.sasc.biopet.core.report.ReportPage) +#import(nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport) +#import(java.io.File) +#import(org.apache.commons.io.FileUtils) +<%@ var summary: Summary %> +<%@ var sampleId: Option[String] = None %> +<%@ var libId: Option[String] = None %> +<%@ var rootPath: String %> +<%@ var metricsTag: String = "bammetrics" %> +<%@ var sampleLevel: Boolean = false %> +<%@ var outputDir: File %> +<%@ var fields: List[String] = List("PF_ALIGNED_BASES", "MEDIAN_5PRIME_BIAS", "MEDIAN_3PRIME_BIAS", "MEDIAN_5PRIME_TO_3PRIME_BIAS")%> +<%@ var showPlot: Boolean = false %> +<%@ var showTable: Boolean = true %> +<%@ var showIntro: Boolean = true%> +#{ + val samples = sampleId match { + case Some(sample) => { + List(sample.toString) + } + case _ => summary.samples.toList + } +}# + +#if (showIntro) + <br/> + <div class="row"> + <div class="col-md-1"></div> + <div class="col-md-6"> + <p> + This Show the relative coverage for all transcripts. De data here is generated by picard CollectRnaMetrics + </p> + </div> + </div> +#end + +#if (showPlot) + #{ BammetricsReport.rnaHistogramPlot(outputDir, "rna", summary, !sampleLevel, sampleId = sampleId, libId = libId) }# + + <div class="panel-body"> + <img src="rna.png" class="img-responsive" /> + </div> + <div class="panel-footer"> + #if (showTable) + <button type="button" class="btn btn-info" data-toggle="collapse" data-target="#rnaTable">Hide table</button> + #else + <button type="button" class="btn btn-info" data-toggle="collapse" data-target="#rnaTable">Show table</button> + #end + <i class="glyphicon glyphicon-file"></i> <a href="rna.tsv">tsv file</a> + </div> +#end + +<div class="panel-body collapse #if (showTable)in#end" id="rnaTable"> +<!-- Table --> +<table class="table sortable-theme-bootstrap" data-sortable> + <thead><tr> + <th data-sorted="true" data-sorted-direction="ascending">Sample</th> + #if (!sampleLevel) <th>Library</th> #end + #for (field <- fields) + <th>${field.replaceAll("_", " ")}</th> + #end + </tr></thead> + <tbody> + #for (sample <- samples.toList.sorted) + #{ + val libs = (libId, sampleLevel) match { + case (_, true) => List("") + case (Some(libId), _) => List(libId.toString) + case _ => summary.libraries(sample).toList + } + }# + <tr><td rowspan="${libs.size}"><a href="${rootPath}Samples/${sample}/index.html">${sample}</a></td> + #for (libId <- libs) + #if (libs.head != libId) <tr> #end + #if (!sampleLevel) <td><a href="${rootPath}Samples/${sample}/Libraries/${libId}/index.html">${libId}</a></td> #end + #{ + val prefixPath = List("samples", sample) ::: (if (libId.isEmpty) Nil else List("libraries", libId)) ::: List("bammetrics", "stats") + + val fieldValues = for (field <- fields) yield { + summary.getValue((prefixPath ::: List("rna", "metrics", field.toUpperCase)):_*).getOrElse(prefixPath ::: metricsTag :: Nil) + } + }# + #for (value <- fieldValues) + <td>${value}</td> + #end + </tr> + #end + #end + </tbody> +</table> + +</div> diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index 61f4243261df710a4ef7c19780a7bcc13146e790..01fd32ab2aef9de5a55d8e726fd4334980888b32 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -39,7 +39,6 @@ class BamMetrics(val root: Configurable) extends QScript var inputBam: File = _ /** Settings for CollectRnaSeqMetrics */ - var rnaMetricsSettings: Map[String, String] = Map() var transcriptRefFlatFile: Option[File] = config("transcript_refflat") /** return location of summary file */ @@ -77,7 +76,7 @@ class BamMetrics(val root: Configurable) extends QScript /** Script to add jobs */ def biopetScript() { - add(SamtoolsFlagstat(this, inputBam, swapExt(outputDir, inputBam, ".bam", ".flagstat"))) + add(SamtoolsFlagstat(this, inputBam, outputDir)) val biopetFlagstat = BiopetFlagstat(this, inputBam, outputDir) add(biopetFlagstat) @@ -107,8 +106,6 @@ class BamMetrics(val root: Configurable) extends QScript rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics") rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf")) rnaMetrics.refFlat = transcriptRefFlatFile.get - rnaMetrics.ribosomalIntervals = rnaMetricsSettings.get("ribosomal_intervals").collect { case n => new File(n) } - rnaMetrics.strandSpecificity = rnaMetricsSettings.get("strand_specificity") add(rnaMetrics) addSummarizable(rnaMetrics, "rna") } diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala index 4da9ec9e5f878262e66473d559cc209580674992..10cecfb2d8c54ff18c78fc74385993bcf17312d7 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala @@ -57,9 +57,13 @@ object BammetricsReport extends ReportBuilder { sampleId: Option[String], libId: Option[String], metricsTag: String = "bammetrics") = { + + val wgsExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "wgs").isDefined + val rnaExecuted = summary.getValue(sampleId, libId, metricsTag, "stats", "rna").isDefined + val targets = ( - summary.getValue(sampleId, libId, "bammetrics", "settings", "amplicon_name"), - summary.getValue(sampleId, libId, "bammetrics", "settings", "roi_name") + summary.getValue(sampleId, libId, metricsTag, "settings", "amplicon_name"), + summary.getValue(sampleId, libId, metricsTag, "settings", "roi_name") ) match { case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString) case (_, Some(roi: List[_])) => roi.map(_.toString) @@ -74,9 +78,13 @@ object BammetricsReport extends ReportBuilder { Map())), List( "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"), - "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true)), - "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", Map("showPlot" -> true)) - ), + "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", Map("showPlot" -> true)) + ) ++ (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", + Map("showPlot" -> true))) + else Nil) ++ + (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp", + Map("showPlot" -> true))) + else Nil), Map("metricsTag" -> metricsTag) ) } @@ -321,4 +329,94 @@ object BammetricsReport extends ReportBuilder { plot.title = Some("Whole genome coverage") plot.runLocal() } + + /** + * Generate a line plot for rna coverage + * @param outputDir OutputDir for the tsv and png file + * @param prefix Prefix of the tsv and png file + * @param summary Summary class + * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats + * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample + */ + def rnaHistogramPlot(outputDir: File, + prefix: String, + summary: Summary, + libraryLevel: Boolean = false, + sampleId: Option[String] = None, + libId: Option[String] = None): Unit = { + val tsvFile = new File(outputDir, prefix + ".tsv") + val pngFile = new File(outputDir, prefix + ".png") + val tsvWriter = new PrintWriter(tsvFile) + if (libraryLevel) { + tsvWriter.println((for ( + sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample; + lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib + ) yield s"$sample-$lib") + .mkString("library\t", "\t", "")) + } else { + sampleId match { + case Some(sample) => tsvWriter.println("\t" + sample) + case _ => tsvWriter.println(summary.samples.mkString("Sample\t", "\t", "")) + } + } + + var map: Map[Int, Map[String, Double]] = Map() + + def fill(sample: String, lib: Option[String]): Unit = { + + val insertSize = new SummaryValue(List("bammetrics", "stats", "rna", "histogram", "normalized_position"), + summary, Some(sample), lib).value.getOrElse(List()) + val counts = new SummaryValue(List("bammetrics", "stats", "rna", "histogram", "All_Reads.normalized_coverage"), + summary, Some(sample), lib).value.getOrElse(List()) + + (insertSize, counts) match { + case (l: List[_], l2: List[_]) => + l.zip(l2).foreach(i => { + val insertSize = i._1.toString.toInt + val count = i._2.toString.toDouble + val old = map.getOrElse(insertSize, Map()) + if (libraryLevel) map += insertSize -> (old + ((s"$sample-" + lib.get) -> count)) + else map += insertSize -> (old + (sample -> count)) + }) + case _ => throw new IllegalStateException("Must be a list") + } + } + + if (libraryLevel) { + for ( + sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample; + lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib + ) fill(sample, Some(lib)) + } else if (sampleId.isDefined) fill(sampleId.get, None) + else summary.samples.foreach(fill(_, None)) + + for ((insertSize, counts) <- map) { + tsvWriter.print(insertSize) + if (libraryLevel) { + for ( + sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample; + lib <- summary.libraries(sample) if libId.isEmpty || libId.get == lib + ) { + tsvWriter.print("\t" + counts.getOrElse(s"$sample-$lib", "0")) + } + } else { + for (sample <- summary.samples if sampleId.isEmpty || sampleId.get == sample) { + tsvWriter.print("\t" + counts.getOrElse(sample, "0")) + } + } + tsvWriter.println() + } + + tsvWriter.close() + + val plot = new LinePlot(null) + plot.input = tsvFile + plot.output = pngFile + plot.xlabel = Some("Reletive position") + plot.ylabel = Some("Coverage") + plot.width = Some(1200) + plot.removeZero = true + plot.title = Some("Rna coverage") + plot.runLocal() + } } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala index fa4f40645b49efee9ba5fb9c42a05c1c10006b0d..6a6039bcd28675e63444a810843bc799941b71bb 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala @@ -17,10 +17,10 @@ package nl.lumc.sasc.biopet.core import java.io.File +import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript } import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils } import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.utils.commandline.Argument /** This trait creates a structured way of use multisample pipelines */ trait MultiSampleQScript extends SummaryQScript { qscript: QScript => @@ -31,7 +31,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript => require(globalConfig.map.contains("samples"), "No Samples found in config") /** Sample class with basic functions build in */ - abstract class AbstractSample(val sampleId: String) extends Summarizable { + abstract class AbstractSample(val sampleId: String) extends Summarizable { sample => /** Overrules config of qscript with default sample */ val config = new ConfigFunctions(defaultSample = sampleId) @@ -39,7 +39,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript => def summarySettings: Map[String, Any] = Map() /** Library class with basic functions build in */ - abstract class AbstractLibrary(val libId: String) extends Summarizable { + abstract class AbstractLibrary(val libId: String) extends Summarizable { lib => /** Overrules config of qscript with default sample and default library */ val config = new ConfigFunctions(defaultSample = sampleId, defaultLibrary = libId) @@ -64,11 +64,22 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript => } /** Creates a library file with given suffix */ - def createFile(suffix: String): File = new File(libDir, sampleId + "-" + libId + suffix) + def createFile(suffix: String): File = new File(libDir, s"$sampleId-$libId.$suffix") /** Returns library directory */ def libDir = new File(sampleDir, "lib_" + libId) + lazy val libTags: Map[String, Any] = + config("tags", default = Map(), freeVar = false, submodule = libId, path = List("samples", sampleId, "libraries")) + + def sampleId = sample.sampleId + + lazy val libGroups: List[String] = libTags.get("groups") match { + case Some(g: List[_]) => g.map(_.toString) + case Some(g: String) => List(g) + case _ => Nil + } + /** Function that add library jobs */ protected def addJobs() } @@ -79,6 +90,49 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript => /** Stores all libraries */ val libraries: Map[String, Library] = libIds.map(id => id -> makeLibrary(id)).toMap + lazy val sampleTags: Map[String, Any] = + config("tags", default = Map(), freeVar = false, submodule = sampleId, path = List("samples")) + + lazy val gender = { + val g: Option[String] = sampleTags.get("gender").map(_.toString) + g.map(_.toLowerCase) match { + case Some("male") => Gender.Male + case Some("female") => Gender.Female + case Some(s) => + logger.warn(s"Could not convert '$g' to a gender") + Gender.Unknown + case _ => Gender.Unknown + } + } + + lazy val father = { + val g: Option[String] = sampleTags.get("father").map(_.toString) + g.foreach { father => + if (sampleId != father) Logging.addError(s"Father for $sampleId can not be itself") + if (samples.contains(father)) if (samples(father).gender == Gender.Male) + Logging.addError(s"Father of $sampleId is not a female") + else logger.warn(s"For sample '$sampleId' is father '$father' not found in config") + } + g + } + + lazy val mother = { + val g: Option[String] = sampleTags.get("mother").map(_.toString) + g.foreach { mother => + if (sampleId != mother) Logging.addError(s"mother for $sampleId can not be itself") + if (samples.contains(mother)) if (samples(mother).gender == Gender.Female) + Logging.addError(s"Mother of $sampleId is not a female") + else logger.warn(s"For sample '$sampleId' is mother '$mother' not found in config") + } + g + } + + lazy val sampleGroups: List[String] = sampleTags.get("groups") match { + case Some(g: List[_]) => g.map(_.toString) + case Some(g: String) => List(g) + case _ => Nil + } + /** * Factory method for Library class * @param id SampleId @@ -117,7 +171,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript => } /** Creates a sample file with given suffix */ - def createFile(suffix: String) = new File(sampleDir, sampleId + suffix) + def createFile(suffix: String) = new File(sampleDir, s"$sampleId.$suffix") /** Returns sample directory */ def sampleDir = new File(outputDir, "samples" + File.separator + sampleId) @@ -180,3 +234,10 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript => sample ::: lib ::: super.configFullPath } } + +object MultiSampleQScript { + object Gender extends Enumeration { + val Male, Female, Unknown = Value + } + +} \ No newline at end of file diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala index 152230c8a41c18e28b69e3c5fb9d10febe8eec68..11ebe1c79158bed3acd8bd714476615851ee4566 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala @@ -18,6 +18,7 @@ package nl.lumc.sasc.biopet.core import java.io.File import htsjdk.samtools.reference.IndexedFastaSequenceFile +import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, Summarizable } import nl.lumc.sasc.biopet.utils.Logging import nl.lumc.sasc.biopet.utils.config.Configurable @@ -63,7 +64,7 @@ trait Reference extends Configurable { protected def faiRequired = false /** When set override this on true the pipeline with raise an exception when dict index is not found */ - protected def dictRequired = false + protected def dictRequired = this.isInstanceOf[Summarizable] || this.isInstanceOf[SummaryQScript] /** Returns the fasta file */ def referenceFasta(): File = { @@ -101,12 +102,11 @@ trait Reference extends Configurable { def checkFasta(file: File): Unit = { if (!Reference.checked.contains(file)) { if (!file.exists()) Logging.addError(s"Reference not found: $file, species: $referenceSpecies, name: $referenceName, configValue: " + config("reference_fasta")) - - if (dictRequired) Reference.requireDict(file) - if (faiRequired) Reference.requireFai(file) - Reference.checked += file } + + if (dictRequired) Reference.requireDict(file) + if (faiRequired) Reference.requireFai(file) } } @@ -121,10 +121,13 @@ object Reference { */ def requireFai(fastaFile: File): Unit = { val fai = new File(fastaFile.getAbsolutePath + ".fai") - if (fai.exists()) { - if (!IndexedFastaSequenceFile.canCreateIndexedFastaReader(fastaFile)) - Logging.addError(s"Index of reference cannot be loaded, reference: $fastaFile") - } else Logging.addError("Reference is missing a fai file") + if (!checked.contains(fai)) { + checked += fai + if (fai.exists()) { + if (!IndexedFastaSequenceFile.canCreateIndexedFastaReader(fastaFile)) + Logging.addError(s"Index of reference cannot be loaded, reference: $fastaFile") + } else Logging.addError("Reference is missing a fai file") + } } /** @@ -136,6 +139,9 @@ object Reference { .stripSuffix(".fna") .stripSuffix(".fa") .stripSuffix(".fasta") + ".dict") - if (!dict.exists()) Logging.addError("Reference is missing a dict file") + if (!checked.contains(dict)) { + checked += dict + if (!dict.exists()) Logging.addError("Reference is missing a dict file") + } } } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala index a3317faf604a9ae80c02ad0c3d9751fbc65849b9..e387200afcfce1687c2087753ddcb1ec16985774 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/SampleLibraryTag.scala @@ -24,6 +24,9 @@ import org.broadinstitute.gatk.utils.commandline.Argument * @author Peter van 't Hof */ trait SampleLibraryTag extends Configurable { + + //FIXME: not possible to have required sample / lib + @Argument(doc = "Sample ID", shortName = "sample", required = false) var sampleId: Option[String] = root match { case tag: SampleLibraryTag => tag.sampleId diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala index 232954b7d8be7bf99cd4b354fb72d570ed1ebddc..108ee694e54fc4dd126dfc82f90adb7470602cde 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/WriteDependencies.scala @@ -80,7 +80,7 @@ object WriteDependencies extends Logging with Configurable { "intermediate" -> isIntermediate, "output_jobs" -> outputJobNames, "input_jobs" -> inputJobNames, - "exist_at_start" -> fileExist, + "exists_at_start" -> fileExist, "pipeline_input" -> outputJobs.isEmpty ) } @@ -114,9 +114,9 @@ object WriteDependencies extends Logging with Configurable { case cmd: CommandLineFunction => cmd.commandLine case _ => None }), "intermediate" -> f.isIntermediate, - "depens_on_intermediate" -> f.inputs.exists(files(_).isIntermediate), - "depens_on_jobs" -> f.inputs.toList.flatMap(files(_).outputJobNames).distinct, - "ouput_used_by_jobs" -> outputFiles(f).toList.flatMap(files(_).inputJobNames).distinct, + "depends_on_intermediate" -> f.inputs.exists(files(_).isIntermediate), + "depends_on_jobs" -> f.inputs.toList.flatMap(files(_).outputJobNames).distinct, + "output_used_by_jobs" -> outputFiles(f).toList.flatMap(files(_).inputJobNames).distinct, "outputs" -> outputFiles(f).toList, "inputs" -> f.inputs.toList, "done_at_start" -> f.isDone, diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala index c773de6155b5a771f242dbbe83a4a21f98089eaa..0e4a29378a8a4334eeed795198c117431bdee8e2 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/extensions/RscriptCommandLineFunction.scala @@ -15,13 +15,9 @@ */ package nl.lumc.sasc.biopet.core.extensions -import java.io.{ File, FileOutputStream } - import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.utils.rscript.Rscript -import scala.sys.process._ - /** * General rscript extension * diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala index 02c860fdb1719c8c4635d467bea752b74745f9b2..bb6e1bf5606f21e99ceb7d557a93b68b690c0c3d 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala @@ -184,6 +184,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config map.toMap } } + object WriteSummary { /** Retrive checksum from file */ def parseChecksum(checksumFile: File): String = { diff --git a/public/biopet-core/src/test/resources/fake_chrQ.dict b/public/biopet-core/src/test/resources/fake_chrQ.dict new file mode 100644 index 0000000000000000000000000000000000000000..e2b0e2af7579a994afedb68a5c495ba794a445df --- /dev/null +++ b/public/biopet-core/src/test/resources/fake_chrQ.dict @@ -0,0 +1,2 @@ +@HD VN:1.4 SO:unsorted +@SQ SN:chrQ LN:16571 M5:94445ec460a68206ae9781f71697d3db UR:file:/home/ahbbollen/fake_chrQ.fa diff --git a/public/biopet-core/src/test/resources/fake_chrQ.fa b/public/biopet-core/src/test/resources/fake_chrQ.fa new file mode 100644 index 0000000000000000000000000000000000000000..171b3737bd458bba03d7c457b6ba51e5ef4f774f --- /dev/null +++ b/public/biopet-core/src/test/resources/fake_chrQ.fa @@ -0,0 +1,2 @@ +>chrQ +TCGGCATCTAGAAACTCCAAGTCTGCAGATCTATAACACACGAGGGTATTCAGCCCTAAAGCATTGCGTCTAGGCATGGGTCTTTAATCTCGCCCCGGCAACCTCTACGCGAGCTCCTACCAGTCAACGTGATTGATCCCTCGTACCTAGCATATTCCACGGCGTTTCATAGCCTTGACGGGCACCTTATCCATTTTTACCTAGTTTGACATTAATTGCTTCAAGACTACTGCGGTACGGGTCCCTGCATCCAGAAAGGATGACAAAAAGTGTGACAGTGCGTTCAATCCATAGATTCCCGTCGGTGCGAGGGCTGAAGTTGAGCCCTACAACCCGGCGATAAAAATTGTAACCCTATGCATTGCGTCAAGTTAAGACCCTCCATCTTCGCCACGCGATAGGAATTTGAAACATCGGCGTGCTTAGAAAGGCTATTAGCGCGGGTGGCCTGATCGGTATCCGATAAAGGCGTTGCCACAAATTTTGCCGATTGACCTGCAATTACATCGGGTAAGTGCATTTCCGCCGTTAGAGTATTAAGTCATGACGTTTTCGACATGGGCATTTAATTGGCCCAAGGTGGAATATTCCTGATGTGCCTAAGTATGAGCTCCCCATGAGAATTTGAAGATCATGAAGACTTGAGTCAATTCTAAATGTAGTTACCATCACTTAAGTTCGTTCGCCTTCTGGGCAGGCTATGTCTAACGAGCGACGTCCGCAGTGTGACAATCTAGATATTGGTTGAGCAGGAATACCAAGGAGGTGCTTGAAGTTTCTCTTATGGAACCAACTTCAATCAATAGGTAGTCTCCGTTTCGTTTCCACTGAGACATTTGTGCAGTGGCACAGTATGTGGTCGTAAGCGCATGGTGTTGTTGGAACGAAGACTCTCACTTTTGTTTCCTTTGGTAGTTTAATTTAGCAGTATCCTGGTCGTTGACCAATTTGTGAATCTCCACGGTCTCTCTTTATCAGTCATACCTCTTAAACCGGCTTCCGCTCCTGCGCACATTTATATCCTACTTTCCGTCAATGTGAAAGAGAGATCAACATAATTTCGCGCACGATCTGTCCCATTTCAGACGCCATCGACGGGTCCCGCCTTCACAAAAAACCTGCCACCATGCAGTAGGTACCTCCATATGCTGAGCGTCCGTTACCGGAAGGGTTAAGTCACTGTTTAAAGGATAAGGCGAGGTTTCCCTTGGTGGTGTACTACTGCTCACGTTCCTGCTTTACTCTCCGAGTATTGTTTTAAAAGTGGGCACGCTGGAACGCAGCACCCTATAAGAAGACTCAGTTAGGTCCCCTTCACGAGCATGTTTGAGCTCTCGGACCTAAGACGTCCGAACTCGGTTACGACGGTTAAAGACAAAGCTCCCAGCATGGTATCAGAAGCCACCACGCGTGAGGATGGCGGGGCCCTGGGCACATTGGCCCCGAATTCTTCGCCCACTTAAGTCGGACTCACCACGGGAGACTACGCCCTAACCGGATAATATCTTTTAGATACCATACAGGGTGCAACCATCGCGGCGTCAGCTGAATGTGAGGACAGAGCCCCACACACAGCACTAACAGATCCATGATTTTACTTCGTTTGGGCGACGTGCGAGCCTATTCGGCCTGTCGGACTTTGTGTAGATTCGGGATTTGACCTAGCGTATAGGCCTTTGGTATACTCGAATTATCAGGGTCAAACTACCCTCAAGGAATCCTATTTCAGACGGACGTTTGCCACCCCGGAACGTTGTCGGATCGCTTCTTGCCGCAGGAGATTCATGGAGTGATAATCGCTGGACTCACAAGTGTCAGCGGACTTTCGGTGTCTTGTGGCCTACTTGCAGTGAACACCACCAAACGAGTAGTATTATGGATTGCCGGCGTGTGTTTGTGGCCATGATTGGTTGATGCGACGGCCTAGATTCTCCTACGGGAATCTACCAGGCCCAAAAGCGCTGACTTTTATGTATTTGAGGGGCCGAAATTACATAGTAACCCAGAACAAATACCCGTTAGTTATAAAGTGAGCGCATAAGTTTGGTCGATCCGGCAGTCGAACCATTGCGGTCGGACATATCCGCAGTAGTACACTAAGGCGGAATAGACTGCCGAGTCAACGCTCCCTCATTCTTGCTACCTTAGATCTCGCAGGTTCGACCATTGCTGAAGCCGCTGAATTACACGAGTTGTTTTGTTAACCCCCGGAATGTAGTTCGTACGCCTCAACTGATTCTTCAAAAGCTCACTGCACGTGACTTGTCATGTGTTCCTAAAACATACCTCATCTGTGGGTCTGGTCCCATAAGCATGGAATGTCCGTCGACGCAACATGGAAACCCACTCGCTCGCTATACGTTTATGGTGAGACAGAAACACACTGTATTAGACTGCCACTGATAGCCCCAGTAGCAAGGTGATGTGGCAGGCATGGTACCCAAACGTCTATCGTTTTGGGAACCAAGGGGAGTGCTAATAGGTCCGGCCACGTAGAATGACATAACCTCCAGAGGAGCGCAGGAGTTGATGCATTAAAAGATCCATACTAAACGTTAGCTTAATGCCTTCAAGGCACCAGCTACCTCCATGACAAGGAGATTTCGGAAGGGGTAAGATTTACTTCTGTCCCAAAAGGGTAATGACCCGTAGGGATGGAATCATTGATGAACTCTAAAGGGACTCAGCCGACTAGCCGAGAGGGCTGGACGATCATTTGATGGGAGAATACGCATACATCTAAGTGTCAAGTATTAAATCGGTAATCTCCGTAAAGGCGTGAAGTTCACAGGGCGCAGTTTCCAGTTACTCCAAGAAACTACCGGTTCAGTTATCGCTTCCGGTGCCTTCACAGCAAACATCATTCGCTCAAGAAAGTGCTGTCGCTGCGTGTGGATATTTCCCCCTCGTTCTGGAAAGTGCTCTCAGCGACGCACGTAAACATGCTGGCGACGAGAGAGGCGTCAACACGGTCCGTTACCAAACTGCGGCATTTACCACGAACCTGATTGCAAAGTGAGATTTCCGTAAGGAGGTTAGCCAAATATTACGTAGAGTGTTCCACACCAAATCCGTCGTCCACATTCGCGACGGCAGTCTAGACGTGTAATTCCCCGGATAATCCAGTTACTACATGCTGATGCAGTCATAGTGCACGCAAATGCGCAACTTAACAAGCACGACCTGAAACAGAGAACCCCTGTGTAGTCAATATAGGATGACGGACACACACACTTGCTGCTGCAATCTTACATTCTGCGAACGAGTGCAAAGTTGAAATCATGACGAACAGCCTTGCTTTTCAGAGTCTCTATCGAACTCCTTTACACCTCCATATCTACTTGCAAATCACACTAGAGGGGCGCAGCTTACTCACTGAGAGATGGTCTACCTAATCGATTTTCGGTGAACTTTGAGTACAGCATTGAGTCTGGAGGGTTCCACTACTTTATCGTACCGGTCCGACATGATTTCTTATCGAATAGATGTTGAGATGGACATTAATAAGCATAGTACGTCTCGATCGATGGCTACCTTTACGTCTATGAGTGCTTACATAAGGTCTCTCGTAAGTCATGGTCCCGCGGGGCTCGCGCAACATTGTGGATTAATGACTCCAGTGACGCATGTTCGATTCGCATGAAGTAGGTGGCGCGTATTCATACATGAATAGTAGGCAGAACGAGCACATTGGACCGATCTTGGAGGTTGGGCTTGAGGTCCCGCACTGATAGTTTACGGCCATGAAGACGACAATTGTCAATACTTCTCTATCCTGAGCGAATGCTGACGGGGGCCAGGCGGAAAAGTGCGACACAGTCTACTGATTACGTGTAGTACGTGGCTAAGCATATTCCCGGGTTCGCAAACATAGGTCTCTGATGGGGTATGGGTAAGAAATCTGAAGGTTGCGTCTCTCACCACGGTCAGGATACCGAATCAGCTCATAAGAGTTACAAACGCGCAAATGAAGGCCTAGTCCACAGGGGTGTCATTCGCACGAGCTGGGCTTAGAATCACTGTTTTCGGTCGCACCTGTAGGGGGACATGGGACGGATTTCACAACAAAAAAGCATCTCTCATGATTCGATTCAGATAGAGGAGAGGAGGTAAATGCCAACAAATCTATTCTTTAGTACCGCCTGACGGAACGTATTTAATCCTCGCCTCAGATCGACACCGCAGGGTAGCTGAAGACGTCCGTTCTTAGACATTTAGTCGATCATTTGTTATGAAACAGAACTAGGAGTCCGTGCCCTTCAGGCCGGCTCAGGGGCACCTACCTCCAGATCGCCCAGGTTGGGTTTATTAGGCGCCGAAAAGTTACTGCCCTATCAGCCCCTCCAATCCGACCTACGGACATCATCCCACTGGCTCGCAAAATATAAATTGCGGATGGGAAAGGATAAGGAAAATCATTACCTACACAGAAGGACAATGTCAGTTCCAAATAACACTGATACTTTCGGAGCAACTTGGTCCGGAAATGTAAGTACGACTATAGCCCTTTCGACCAACGCCGACAGTCCTATTTGGACGCCGAGAGAGGCGACGGGTAGCCGAATGTAAAGCTCTCGGGTCGCTCTTGGCGGAATGCGCTGCGGGTCCTACCCTAAACCCTTACCACCACCAACTTCGTTAGGAGCCGTATAGATTACAGCTCCCGCAAAATTAGAGAGGAATCTGAGTTATTAGCTGAGGACCCCGCATTTTCTGCGACGGCGTAGCTGCAGTGACGTACGATATGAGTTCCCGACTGTGAGGGAGTCCCAGTCGTGACTCCCTACAACGGCTCCAGATATTGTTACTTATGGTCAATATGCCCCGACCGCCCATTGTCTCGAGTACAGTCTTCCCCAAAGTTAAGCTGTGCATTACCTTACCGTTTTAGGTCCAGCTGGTAGCACCGAATGCTGCGCAATCCGAGCCCCCGAAATAGACTACGTGTCCACGGTCAATTGTCATGGGTAGCAGAGCTCAAAGAGGAGAAACGTGCCCCGTAAACCTATTAGATCTCGGTTGATAAATATCAGGCCACAGCAGGCTGCCCGATGCTTGTTTGAACAACAACTTCGGGAGCCGCGGTCCTTGGTTCTCCCGATATTCGGCCGCACCGAACGGTACGCGTCATCGCGAGGTGCGTTCTCGCAGCAAGAAATATTTGTTGTTGTTGTCTTCCTTCCGCATAGGAAACCTTAAGCGGTACCTTTCTACGAAGTTGAACCCTAGAAGCACGTGTAACAATTTTTTTTACGCTACACCCGGATCTGCTTCCATCTGTTGATCATATGAGCCTAATGTGACTAATCTGTGCCGTCGATTGAAAATTCGTTCTGAACCTAATCACATGAATTAAAATTAGGGCGAGAATTGGCTCCTTTTGGGCCGTAATCCTTCAAAGGGTTAACCGAATTTAGCCTCCACGGTGACACAAACTCCCATAGGTAAGGCAAACCCAATAACGAGGAAGCCTTGCCCACAGCATGTTTGATAAATACCCTTAGGGTAATATCGCGTGCAATACTGAAGCCGCTCTTCTAGCATCCGTGTTTGACATACTATGACCTTGAAGCCTGCCGCAGCTTCTAGGTCATCCAAGTAGATCAAAACGCCATGTTGTGGATCCATGCATCTTCCCAGTGAACATGGATCTTAGTGTGACAGGCGAGGAGCGGCGAACACTATCGGTGTGGCAAGCTCGGGCCTTCGTACGTTGTGGAAGTATGCGAATAAGGAGACCGTAATGTATCAAGTTCTTAAGAGCCTTGGTACCGTTGCAATTCGGCATGTTCCTACAGAGACACTCCGTGTTTGTCATCCGTCATAGATCTATGGCGTAGTTAGCGCCTCTGAAGTAGTTGTCCATTCAGCAGGCATTGCTTAGGGAGTTTCTGGCGCTTGCCGCTCAAGATGCTCACGGGCCTAAGTAGCACGGCAACCTTTTGACAAAGCATTTTATAAACTGAGCATATTGGCCCGAAACTAATCCAGCAAAGGGTGAAGACCTGTCAGCGGGCCCAGAGTGTGAACGGTCTACTGCGCGGTACATAAGTGGCGTAATCCATCAACAAGACCTACACGACCTGAATGATTTCCAACAACTTTATATGCTTTTCCGCATCTCGAGAGTACCGGAATCTATGCAATCTCCCAAGGATCCGTAGATTTGAAATTCAATCCGACGGGGTAAGGTTGCCGCGCCGGTTAGCTAATGTGCGGATTTATAGTCTTTTTCCCAGAAGGCGTAGTTAGTTTCGCACCTAACTACGACACATACTTGGGTCGACTGTTGAAGGTGGTAAGTTGCGAGCAGTCCGCCGCTCTCACGCGCCGAACCACGTTCATATCGGCAAAGTTGCGCGATGACCTATAGGTGTGCAAAGCTCGTCCGACATTGGGATTGGATTCACGTACATACGTTAGTATCATGGGTAAGCTTCCATGTCAGCCTCGTGTATAGCACCGGTGCGCCGCGCGTTAAGGATTCTATGCCCAGCAAATGTGCCAACGTTGTGGGGAGAAAAGTGTAGTTGGATGCGATCGTGACATCGGCACACCGAAACTCTGCAGCCAGTCCCGCTAATCTCATTGGCACCGGGTAAGAGATTACCTTTGGTTAGGAATCGCGTGCGACGTACTGCACGAAAACAGTGCCTGAACCGAGGTGTTTACTTAGATGGTTCTAGACCCAGCATGTTCCTCACTGGAACCTGACGTCGGTACGTGATCCTCTATACCTCCTTTTCGGTATTGGCCTGGCAGCTACTCTAACTGTTTGGGCCGCGCCGATTTCTCGAGTCCACACGGCGAGGTCAGCAAAATTGCCAGTTAGTGGATGTTGGGATCTCAACGCATTACCATGAGAGTTCTTGGTTTACCCGTTAACATCGCTGCGCACGGTGTGAAAAGCCTGTTTCTTTGGCCCCCATCATCTTCGGCCCGCAGATCTCAGATCAATGATGTAAGGTTGCGGCGGCAAAGACTAGACTTGAGTCGTGAGATGGTGCTTTGCTGAGGCCGTCTCCTATAGCTTATTCTAGGACTTTCCGCAAACCACCCGACGTGCGGCTGTCCACGATCGGATTCCATTCTGTCTCGGAGCATACAGCACTAGATTTGCCGCTTGAAAAATGTTCCATAACCATGATTTCAACCCCATCTAGTCGGCAGGCACAGCTGAGAACAGCGAAGGGCGTCGTGAAGGGCATTGCCCGTAGTGTTTCAGACGTGCTAGAGACTAAATCAACTATCTGCACTCGTAGCCTGGCGTGTGAGATGTCACCACGATGTGCCTAGAGGAGTGATTATGAACATGTATTACCACGTCCGGGTGTCGACGGCTATATGGCTAACATTTCTTATGGCTAGACGTGCTTGGAAAGGTTCCCCAGCCTTCTGTTTCCCGGTGCTTTCCACGAGTCTGGAGTTCTGGTAATTAACTACATGGCGTTAACGCGGAGGTAACCCCCAGTCATTGCATTGCAGGTAGGGCTTAGGTGCAATATAATTCACCAAGGCGCGGATTCCTCACGATTGTTACGAAGACACCCGGAGGGTTTCAGTATGGCTTGAGAAGTGTACGTTTTTCCGGCCAGGGTGTAACTATAACCAACACATGTTTGGCCACGGGCTAAGTCGGTCCGCACGACTGATTTCCCCCGCCCATGTGTTTGGGAGCAATAAACTGCGTCTGCCAAGAGTAACAACTCGAGTAGAGAAGGGAAGTCTCAGACTATTTTGCAAATCAGACTGTAAGGCTCAACAGCCATACAGCTTGCCCTACTACTGAATACTAGCGTAGCGTGGCCACATAGGAAAGACTTCATGTCTTCTAATAACCTTTTACCTCCAACGTCCCCGCCGTCTTCACGCGGTCCAACGATGAGGAAACAACCACCCCTATCTTCCGCGGAGTGGTTCACACGACCCCCGGCGTTAACGCGCACGTTGTTGTCTTTCGGGACGGCACTACCCCCAAATGCCCAGACCCAGTGCTAGCGATATTCAAACGCCGTCCGGTAAGTCCTGACGTTTTTCAACTGGATGCACTGGCGACACGTAGTTCGCAAGGCGTCCATGAGAGGTTTTAACCGTCATGTTTCCGTATCACGTCTTATGTCTGTCTCTATTCTCAGCGAAATTCTCATCATAGGGCGGAGACTATCTGAAGGCCAGCGAATACAAGATTTAATATCAAATATAGCATGGGGGCCAACAGAGGCCCCCCTGGTGCTGACGAATTATCGTGATATTAGTACAGCTGTCTGCAATGCCATTTCGAAGGCTTTTTGTTCGTATCACTGCTCTATGCATAGCGGTCACTATGACCTCTCAGCTTGACTCACCCGAATGACCAATTGTGGTCCAGCACTCCCTCATCTTCCCCCATTAACGATACGTTGGGCACCATCGGTGTGAGCTACCCGTTACAGTCATAGAATCGTTCTTTGCGTTGTACGCGGCACGGAGGTGACCGGGAAAAGCGCCGCGAAGGCCCCGCACTGAATAAAGCTAGTATTAGCGTCTGTCAAAGTGTTTTGACACCTAATTCGCTTCCAAGTCCCAATATCTAATCTAGCCTGCTTTGGGCCAACATCTCATTGCGTTATGCTAATGAAGAGGGTGCGGGATCACATCCGCTCTTCTCTTCCTATACACAGCGGACATTCGGGTTGGACGTTTGGAGTGATAATTTATCGTTAGGGATAAGTATGTCGGCGCTTAGTAGTATAGCCCGCTGACCAGCGTTCGATTTCGAACCTTACTGGACATTCTCAATAACTACTGATCATGACGTTTTCCTCAGTTCCTAGCCTTGACAACTAGCCACAGTCAGCATGGTAGAGAGCGTTGAGCCGGGGATAGCCAGGCTATTAAGACAAAGACCCTCGGGCCCCTTAATGCGCGTCAAGTCTGACGGTTTGAGTGCGGAGCAGTAAGCGCTTTGGTATAACCGTGACGTAGCAGATCCATGCTTCGCCCGCTTCCACCTGAGAGATACTAGCCTCTTTCGCACTTTGTAGGATTACGGGCAGCGAAATATTTATCCTGTGCGGCGAGCCCGCTTCGGTTTCGAGCTCTATCAGTGCGCGGTTGGCACTCCAACGCACGATAACATATACCCGCCCACAAGGCCATGCAGGTTTAACCTCCTATTCTGATTGTACCTGGCTGACTTTACGGTACCCACCAGCGCAGGATTAATAGCCTAATTATGCTAACCGGTGCTAGTCTAACTGCTGTTACTAGTCCGCCCCAGCTACCCCACGGGTCAGTAACTGCACCAGCAAGCATGGTTCTCCTCCTGAAGTTGTACGTTCGAGAACCCCGTATCGAGTTGGTATATAAATTAAGGGTTGTCTAAAACAGAAGCCTATTCCGCTATCATCGGTGTAATAACTGATCGCGCCGTGGTTAAATGGAGGAGCACCCGCATGGATACATCGCTAGCGTCTTGTAACTCTCTGGGGGCCTAGTATGGAACGGAACAATGACATCATTGCTTACGGGGCCCGCACTTAGCTGTCGCGTATCGCAAATCATATGGCATGTCAGTCCCGACATCACGAAAATGACCCCATCTGAGGTGGTCGGGAGGCGAACAGTCGAATATGATGTATGCACCCGCAACTTAATGTTCAAAGGCGGGCGAAATGCCTTCTCCCGTCCGGACTATCCTGAGTGCTAGCCGCGAGTCTGTAAAGGTTGACGCAACCATATAGCACGCAGAAAAATCACTCTCACACCATGAGAACCATGGCGGCACGCTGTCTACTTTGTCTGACAGGCTACGGAAGGAATGGTACATACGTACAAACGGATGATATGATATCGGTCATTGCCTATTGTGACGCTACCCTACTGCATCACCCCCTTAGAATGCGTTGGACGCTCTATAGCAGATCCTCCATCCAGTGGAAGTCTCGTCGCCGTGGTTTGCCTTAACGACCGTTGGAGAGAGCAGGACAGAAATATCGCCCTTTTGAGCGCATTATTTGGAATCGAGGTAAGTCAGTGCGGCATAATCGCGCCTCGTGAGCGGAACAGTTTTTGATCCCACCCGCTAAATGCCAAGGTGCTGTAACCTGGGCGCGACACCAAAAGACCACGTGCTGTATGAAGCATGTGTTCTAGCGCACTCTCAACCGTTACCCCGAGAGTAAAATGTTAGTTGTAGGCCGATTCTGCAATGGTAATTGGCGGAGTGTCTAGGGAAATGTTTCGGTCATACTTAACCGGCTACCTCTTCCTCCCTCAGATTCGGTCTGAGATGAGATATACTGGGTGAGTTGAGTCGCCCTGTATCGTTGCGGCGCTCGTGGACCAGACAGACAGTTCCCGTTTATCTCTGCTTCTAGATGGAGGGTCGCCTCCGTGTTAACGCCGGCGAAGGTAGTCGCAGCTGAAGTTGTGATGCACAATCAGGTGAGCCTTTTAAGTATGGTCCTACGGACGTGAACAGCTGGGCCCAGTCATTTAGTACGGGGGGTTTACCTATAAGGATACGGTAAGAACGTCATCTATCCGTCCCACTGGAGTCCGAGGGGTTCGTGTCTACACGGATTACTTATCATGCACACACGTCTACGGTCATGCATAAAGTTGTGCAGCGCAGCAATCGGAGCGGAGTTACACCATCTCCCTATTAACAAGGCACTTATTAGTACTTACCCCGTTATAGAGCTCTCATCTTATCGATAGAGCGCAGTCCTAAGTATTGGCTCGAGTGATTCGCTCCTCAGCCCTTGATTGTAACTCCCCCGATTGCAGGTTGTATGGTGAGTAAAATCTCTGCGCCCTTCTGTTCGGATAAAGAACCCCGACCACTAATGCCCGCCTGCTTGTTGGGCGGTAAATGGGTAACGGAACATGGACTATGAGTGCGATGATGGTCAATAGAATTACCTTATTACGCAGTAAAAGGAATGACGCAGACAGGTATTTGTCGACGATTGCTTCGAACCTGGCAAAATGGGGAGGTATCCTGTCATGTTCATCTGTAAAACAACTCCTGCCTCTTCGTAGAGGACACACACTGTGGGCCTTTAGCCTTTAGCAGCCCATTGGGGCTTACCAGCTGTCGTCATGGGGTATCATTAAGATCCATGCGCCCCCGAAACTTACTGCAAAACAATATGGCTTAAAGGTAAAGGGACCATCAGGAGAATGCTTAAGAGCGACATATAGATACGTATTTAATTAATTTATGTTAACGCAACCATCTCGCAGGAGTCGCATAGCATATTGCCGGGTGATAGTTAATGCACTGTGCTTCCGTGTTTATATAAAATAAGCAGTAACCTCTGACAGGTTGAGACTCCAACAAGTGCTCCGGGTATTTACCTTCTACCATGGCGTTCTAATATCACGAAAGAGAAATTGTGTGTACCGATGCCAGGTGACCGCCCGCGTGCGCCAACGACGCAATCTAGAGCATCCACGCTGAATTGGGGAACTCTTGCCGTTCGTCGCATGGTGTACTTGGTACCACTCGATATGCCTGATTAGGTTTGGCCGTAGCACGTAAGGTAGTGACTTTCCATTCAAGCTAGCGAAGCGACACCACCACAGTGCCCGGTCAAAATAACCCACACCTGGCCAGCATAGAGGCTAAAATAGCTACAGTGCGCTAATCGAGTGTTTTTGCATCGGCTCGTGGCTGGTGGACTCGGGACAGCTTAGAACTAACTCTGGTGTACAAACGCGATCGTAGCTCTCGCGACTTACTCACCGGAGTAGGTTAGATGGACAAGACCTAACCCGAAGCCTAAATCGCCCTGAGTGTTAGCCGCCATTCAATTCTATGGTTTATCGGGGGCGTCTATGGCTGCGACAGTATGGAGGCCCGTTATGGGCACCCGAGTATCGTACCATAGTAATCCCATATTCCTCTTCGAGCGACTATTGGATCAACATACCTACAGGGTAGTATGAATGTTCTTGATTACAGAAACCATGGAATCGGCGCATTCTATGTTTCACTTCCGAATAACAGTGAGCAAGGCATGCCCTTGACAAGGATCATCCCGACAGCAAGCCGATCGGGCCCTAGAGCCCGACCCCCAAACAGAACACCGGCCACGTAGTTGCTGGGACTAAACAAAGGTGTGTTTCCATAAAAGGAAATCTTCAAGTGTATTGTTGAGTCGTAACGCTTATATTTATGGCCCAATGGGCGTTGCGAGCACAGTAGCAGGCCTAGATGAATGCCTAGGCCACGATCGGGGGGAGGCTCATTGAACGTACTGCCATACCAAGCCCCCGTATGCTATGGCAGGAGGGGTTCTCTTCGTATAGAGCGAGGGTCTCTACGCCAAGCAGCATTCCCGTGTTGGGTGGCCAATGGGGCTCACTAGAAACTCGGTTTTTTTAGCGAAGGAATGAGCAAACTCGTGAAAGGTGGTACACACCAGTTGCGGCCGATTTGTTGTAGCAACAAGGTTTGAAGAATTGAGTAGATGGGCCAATTTACCTCCTATTTAGCGAGTGAGATGGCGCATGTTTATTCAGACTCCATGTGGGGTAGAGGCTAATCGTTTAGTAGCAATAACCCCGCGGGGCAAGAGACCGTAATAACTTGAATCTGTGGTAGCTATGAATATGTGCTTCGCCCTAAGTGTTATGTAACAAGAGTGATCCAGGGGCTCAGATCACACTTAGTACGATCCGCTACTGAAATGCGGCCGCGGGCTTGCACGCTGGACATAAGTCGGATAATCAATTGCCTACGACAGGTTCAGCCATAAGGCTTGGCTCCTAACACACTCATGATGTCTGGCTTTTACTCGTGCCCGGACATAAACGTATGCTCAAACGCGAGACAGGGGAGGGTCAGCACCGTTTAGATCTATAAGGCCTACCGGTAATATGGATCGACAACAAACAGATGCTATAGGGATACCTACTCCTTTGGACCCACATGTAGATGAAGGCAAACACGCAGAGCAAAGGAGAGTAGTCCACCCGGTATAAGTTTGTGCTTTGAATTCTGGCTACGCAGACTTGCACTCTGTCCCGGCATTCACTATACTTCTCCGGAAGTCCTTTAAGAAATGTCCGCGCTCATGTGGTTCCCGTTGCTCAGGGGCCAACTCAAGTAGATCTTTAAGGCGCAGTCGACCACAGGCTACTAGATACGAGTTATACTTATCCGGACATCTGGCTAAATACTTGGATACGATACTTCCCCAGTCGTGAGAACGAAGCTAATACAGATCGAATTTCGATGGTTCAGGCAGGCAGTTCTCAGGAGGCAAGGTGTTAAATAGTTTCGGAGGCTCTTTCGTACGATCAGGGTCTACTACCCTAGGGCATTTTGACTTTGGATTAAATATGCAAAATGCAAGGCCGATTGTGATCAGTACTGATACTCCAACTGGACCACCTTCAGACCCTTCGAGGGGACCTAGACGACGGGAACCCTTCCAGCGGGTGATACCAGTTAGAGCAAGTCACAAACACGATTCAGCCCCCGGGGTTTATGACGTACCATGCGAGTAATAATGCACGTATACGGAGCTCTTCCACCGAGCGATGGCATTTCGGGGCGAGGTAGTTGTCTTTCATTGGCATCGCACAACCCCCATCCTCTTAATTGGCATCGTCTCCAGCTGGAAAGAATTTGAGTGAGCATGTCGCCCCTATTATTCCGTTGCCAATAAAGTGTCTCAACTTTTGGCGAAGGTTTTAACGCATACAAGGAGAAGCCGCGAGACGTCTGTACCGCTGATCTGGACGCAAAGTGCTCGGACTGCCGCTGAGTTATCCTGGACGCCATGATTAGAGCCGTCGTCACTACCTGCATACATGGGCCGATAGAGTACTGCAACCAACAACTCACTTAAGCTCCACAACGGCTGGACACTTCCGAGAGCGGTCTTACACAAACGTTAGGTCCTGGGCCGCCGACCTTACCGCTAGTTAGTGAGAGCCAGTTAAAATTATGAACGCTCGGAACCTTCCCAACAGTGGCCGCAGCCTTCCTTGACGCCTAGCACATCTGGTTTATACTCGGGTATGCCGTAGATCGGTAACCTAGGGAACGACCCTGTGGGTTTAACACCCGAGTGCGTAATCAAGCCTAGAGGCCATCTCAACTCGAGAGGTCTCCTGACAAAGAGGCGCCCGATGAATCATCCAGAGGCGTCTGGCGGTCCTACGAGAGTGGCTTTGGATGCCTGCCCCTTGGATGGATCTGTCTTTAATCGGCGCCAATACCTAGCACTGCTAGGCTCCAGACTGTGTTTACATGCCGTAACCCTGATACTCGCAGAAACGTTGCTGGAAATTCCTAGCAGCTGAAACCATTCCCCGTAACGTACTAGTACGCTAAGAGAGAGTCTCTCCTGGCCCTGATGAGTGTGTTCTCATCTGGGGCACGATACAAGAATCGGAACGAACGCAATGCCGAAGTCCCTTGTACCTTAATTTGGGCGACGCAGATAGACCCAAAGATCGCGGACTACGGAAACTAGCATAGGACCTGTGTCGAGAAGGTTCGAGCAGGTAGTGACACGCAGCGCGGTGGCCGGCGGGGTGGCACATTGCGGGTCAATACTGGTAGTAGCCACTCTTTGGACATAGCGGCGGACCAGCGCCTAGAATGTCTCATTCTCATTTTGTTCCGTGGCACGTTACGTAATGACGGCCCGCCAGCACCTGTGTATGGACTTGTAGCTCGGGCCTCTGGTCCTGGCACGACAAGGCACCAGCCAGTAATCTCTCCTAAGGCGCTAGCGTGCATAGCGCGTCTGCCTACCGCCAGAGAACGCGTCATCTGCAAGACGTCCCAGCGTAGTGAATTGTAACTGCAAGCGTTCTCTTACGGTCATAGTGCCGATTTTGAGCAGTAATGGAAGCAGCAAAATGCCGCCCAAGCGATTCGCAAACTTCTAACAGAGCTACAGCCGGACACGACGCGGTGGTGCTCGCGGTTGGTGATCTTATGATATTAACGCCCATAGCGGCCATCTTAATCGACACCATGTTCGTTTTGGCAGGCCTTGTGGTAAACACGTGCTAGTGGCACCACCCATGCCCGTGCCCATACATCCAAACCGAGAGAAAGCCTATTTAAGCGAAAACCACAACTTCGAGGTTTCACCCCCTGCCATTGATAAAGCGAGGAGTACCCCCGATGCCGGGAAGCGTCCGCACCCATTTCTTTCGTTCTGGAATCCTCGGGCGACTTCTCGAAGATACTGTGCTCACGACCTGGAGTATCATGAACAATCGGAGGAAAATGAGTAATTGTCGAGTCGTTGTTAGACGGCACTTCCGTCCGGCCCAACTGTTCTCGGATACGTGTCCCGTGGTCAATGCTCTAAACCGGCTGCCGGCGACTCAGTTCACTGAGACAAATTCTGATGCTTTCGAAGCAAGGATGCGCCCAGAGCAGAGCTGCCCAGATGAGGTTAAGAACGTAACTATAATCGATCAGCCATTCGGCTTAAGGGGCCCCGGCGAAACGCGAAACACTTGGCACATGGACGCTTCACGCGCAACAGTAGTTGTCTCTTTCGTGAGCCACCGTAGCAGCTAGAAAGGCCTATCCAGTGATGCTTTATGACTGAGTGTCGAATCTAGGTATAGCATAGACTGGCTGATCGGGCGGGTCGGCCCACCCGTCTCGGTCGAGCGGTTCTGACTTTGGGTGGCTGTGTGAACCCAACTGCAGATGGAGTTGAATGGGTACACCCTATGCGAGGCCTCGTCTTTACACCAAATCGGGGCCCTGTGAAGTGCCACTCTTTTCCAGCCGGCAGCCGCTCAGTCTGATTTTGCTTGTACATGTCGTGTGCGAACGTTCCGGGAGGCTTCCGTGTTCCAAATACCGTGTTCTCATATTCGGTCCATCTACCGACGGAGAGTTGGGATGCCCGGGCCCGGAAATATAATTTAAACTCGTGGCCAAGAATTTAGCATGTTGTAAACATGAGAGACAGGGCCGGGCTAAAACATTACCCCTGAGTAATGTAGAGCCACAACTGAACATAACATTGGGATCTAACGCACGCAATCAGTGTAGCTTCAGCCCACCCTCTAAATTTCCCCCGGACAACTGGATTATCACCTGCGTCACGCGATAATTGCTCGCATCTCACCAACACACTTCGACAAATCTGGAGTCTCCCTGGTCCGTACGTCCAAAACCGTTTAAATGGGCGGGTGTGTCGTGAACCAATCTCCTCTTCCATTTGTCACATACTGGCGATGACATCCTTTTACTTGAATTATTCATCCGGGCACCAGCCGCTTTCCCTACGATCCCCGACACTCGGGGCTTCGGGAGTTGCCCGCCAAAAAACCGACAAACCAAACTATACAATCAATCCCATCTAGATGTAGGGGACTGAGGCTCTAAGCTATGCGCCTACTATACTTTGTAGGTATCAAACTACGCTTGAAGATAGTTGATAAGGAAGCGAATTGATCGAGTACCGTATCTTCAGTCCGACTCCCGTTCGAACGCAGCACGCTAACATGGTCCACTGGCATTCTTACTAAATACCTAGTTCACTTCTACATGAGGAGTGTCTGGGCCGGACTCACCTTTGATTAGATAACTGAAG diff --git a/public/biopet-core/src/test/resources/fake_chrQ.fa.fai b/public/biopet-core/src/test/resources/fake_chrQ.fa.fai new file mode 100644 index 0000000000000000000000000000000000000000..b7a558fdb3b3c0e85f6e3c634cc3ae80c601336d --- /dev/null +++ b/public/biopet-core/src/test/resources/fake_chrQ.fa.fai @@ -0,0 +1 @@ +chrQ 16571 6 16571 16572 diff --git a/public/biopet-core/src/test/resources/fake_chrQ_no_index.fa b/public/biopet-core/src/test/resources/fake_chrQ_no_index.fa new file mode 100644 index 0000000000000000000000000000000000000000..171b3737bd458bba03d7c457b6ba51e5ef4f774f --- /dev/null +++ b/public/biopet-core/src/test/resources/fake_chrQ_no_index.fa @@ -0,0 +1,2 @@ +>chrQ +TCGGCATCTAGAAACTCCAAGTCTGCAGATCTATAACACACGAGGGTATTCAGCCCTAAAGCATTGCGTCTAGGCATGGGTCTTTAATCTCGCCCCGGCAACCTCTACGCGAGCTCCTACCAGTCAACGTGATTGATCCCTCGTACCTAGCATATTCCACGGCGTTTCATAGCCTTGACGGGCACCTTATCCATTTTTACCTAGTTTGACATTAATTGCTTCAAGACTACTGCGGTACGGGTCCCTGCATCCAGAAAGGATGACAAAAAGTGTGACAGTGCGTTCAATCCATAGATTCCCGTCGGTGCGAGGGCTGAAGTTGAGCCCTACAACCCGGCGATAAAAATTGTAACCCTATGCATTGCGTCAAGTTAAGACCCTCCATCTTCGCCACGCGATAGGAATTTGAAACATCGGCGTGCTTAGAAAGGCTATTAGCGCGGGTGGCCTGATCGGTATCCGATAAAGGCGTTGCCACAAATTTTGCCGATTGACCTGCAATTACATCGGGTAAGTGCATTTCCGCCGTTAGAGTATTAAGTCATGACGTTTTCGACATGGGCATTTAATTGGCCCAAGGTGGAATATTCCTGATGTGCCTAAGTATGAGCTCCCCATGAGAATTTGAAGATCATGAAGACTTGAGTCAATTCTAAATGTAGTTACCATCACTTAAGTTCGTTCGCCTTCTGGGCAGGCTATGTCTAACGAGCGACGTCCGCAGTGTGACAATCTAGATATTGGTTGAGCAGGAATACCAAGGAGGTGCTTGAAGTTTCTCTTATGGAACCAACTTCAATCAATAGGTAGTCTCCGTTTCGTTTCCACTGAGACATTTGTGCAGTGGCACAGTATGTGGTCGTAAGCGCATGGTGTTGTTGGAACGAAGACTCTCACTTTTGTTTCCTTTGGTAGTTTAATTTAGCAGTATCCTGGTCGTTGACCAATTTGTGAATCTCCACGGTCTCTCTTTATCAGTCATACCTCTTAAACCGGCTTCCGCTCCTGCGCACATTTATATCCTACTTTCCGTCAATGTGAAAGAGAGATCAACATAATTTCGCGCACGATCTGTCCCATTTCAGACGCCATCGACGGGTCCCGCCTTCACAAAAAACCTGCCACCATGCAGTAGGTACCTCCATATGCTGAGCGTCCGTTACCGGAAGGGTTAAGTCACTGTTTAAAGGATAAGGCGAGGTTTCCCTTGGTGGTGTACTACTGCTCACGTTCCTGCTTTACTCTCCGAGTATTGTTTTAAAAGTGGGCACGCTGGAACGCAGCACCCTATAAGAAGACTCAGTTAGGTCCCCTTCACGAGCATGTTTGAGCTCTCGGACCTAAGACGTCCGAACTCGGTTACGACGGTTAAAGACAAAGCTCCCAGCATGGTATCAGAAGCCACCACGCGTGAGGATGGCGGGGCCCTGGGCACATTGGCCCCGAATTCTTCGCCCACTTAAGTCGGACTCACCACGGGAGACTACGCCCTAACCGGATAATATCTTTTAGATACCATACAGGGTGCAACCATCGCGGCGTCAGCTGAATGTGAGGACAGAGCCCCACACACAGCACTAACAGATCCATGATTTTACTTCGTTTGGGCGACGTGCGAGCCTATTCGGCCTGTCGGACTTTGTGTAGATTCGGGATTTGACCTAGCGTATAGGCCTTTGGTATACTCGAATTATCAGGGTCAAACTACCCTCAAGGAATCCTATTTCAGACGGACGTTTGCCACCCCGGAACGTTGTCGGATCGCTTCTTGCCGCAGGAGATTCATGGAGTGATAATCGCTGGACTCACAAGTGTCAGCGGACTTTCGGTGTCTTGTGGCCTACTTGCAGTGAACACCACCAAACGAGTAGTATTATGGATTGCCGGCGTGTGTTTGTGGCCATGATTGGTTGATGCGACGGCCTAGATTCTCCTACGGGAATCTACCAGGCCCAAAAGCGCTGACTTTTATGTATTTGAGGGGCCGAAATTACATAGTAACCCAGAACAAATACCCGTTAGTTATAAAGTGAGCGCATAAGTTTGGTCGATCCGGCAGTCGAACCATTGCGGTCGGACATATCCGCAGTAGTACACTAAGGCGGAATAGACTGCCGAGTCAACGCTCCCTCATTCTTGCTACCTTAGATCTCGCAGGTTCGACCATTGCTGAAGCCGCTGAATTACACGAGTTGTTTTGTTAACCCCCGGAATGTAGTTCGTACGCCTCAACTGATTCTTCAAAAGCTCACTGCACGTGACTTGTCATGTGTTCCTAAAACATACCTCATCTGTGGGTCTGGTCCCATAAGCATGGAATGTCCGTCGACGCAACATGGAAACCCACTCGCTCGCTATACGTTTATGGTGAGACAGAAACACACTGTATTAGACTGCCACTGATAGCCCCAGTAGCAAGGTGATGTGGCAGGCATGGTACCCAAACGTCTATCGTTTTGGGAACCAAGGGGAGTGCTAATAGGTCCGGCCACGTAGAATGACATAACCTCCAGAGGAGCGCAGGAGTTGATGCATTAAAAGATCCATACTAAACGTTAGCTTAATGCCTTCAAGGCACCAGCTACCTCCATGACAAGGAGATTTCGGAAGGGGTAAGATTTACTTCTGTCCCAAAAGGGTAATGACCCGTAGGGATGGAATCATTGATGAACTCTAAAGGGACTCAGCCGACTAGCCGAGAGGGCTGGACGATCATTTGATGGGAGAATACGCATACATCTAAGTGTCAAGTATTAAATCGGTAATCTCCGTAAAGGCGTGAAGTTCACAGGGCGCAGTTTCCAGTTACTCCAAGAAACTACCGGTTCAGTTATCGCTTCCGGTGCCTTCACAGCAAACATCATTCGCTCAAGAAAGTGCTGTCGCTGCGTGTGGATATTTCCCCCTCGTTCTGGAAAGTGCTCTCAGCGACGCACGTAAACATGCTGGCGACGAGAGAGGCGTCAACACGGTCCGTTACCAAACTGCGGCATTTACCACGAACCTGATTGCAAAGTGAGATTTCCGTAAGGAGGTTAGCCAAATATTACGTAGAGTGTTCCACACCAAATCCGTCGTCCACATTCGCGACGGCAGTCTAGACGTGTAATTCCCCGGATAATCCAGTTACTACATGCTGATGCAGTCATAGTGCACGCAAATGCGCAACTTAACAAGCACGACCTGAAACAGAGAACCCCTGTGTAGTCAATATAGGATGACGGACACACACACTTGCTGCTGCAATCTTACATTCTGCGAACGAGTGCAAAGTTGAAATCATGACGAACAGCCTTGCTTTTCAGAGTCTCTATCGAACTCCTTTACACCTCCATATCTACTTGCAAATCACACTAGAGGGGCGCAGCTTACTCACTGAGAGATGGTCTACCTAATCGATTTTCGGTGAACTTTGAGTACAGCATTGAGTCTGGAGGGTTCCACTACTTTATCGTACCGGTCCGACATGATTTCTTATCGAATAGATGTTGAGATGGACATTAATAAGCATAGTACGTCTCGATCGATGGCTACCTTTACGTCTATGAGTGCTTACATAAGGTCTCTCGTAAGTCATGGTCCCGCGGGGCTCGCGCAACATTGTGGATTAATGACTCCAGTGACGCATGTTCGATTCGCATGAAGTAGGTGGCGCGTATTCATACATGAATAGTAGGCAGAACGAGCACATTGGACCGATCTTGGAGGTTGGGCTTGAGGTCCCGCACTGATAGTTTACGGCCATGAAGACGACAATTGTCAATACTTCTCTATCCTGAGCGAATGCTGACGGGGGCCAGGCGGAAAAGTGCGACACAGTCTACTGATTACGTGTAGTACGTGGCTAAGCATATTCCCGGGTTCGCAAACATAGGTCTCTGATGGGGTATGGGTAAGAAATCTGAAGGTTGCGTCTCTCACCACGGTCAGGATACCGAATCAGCTCATAAGAGTTACAAACGCGCAAATGAAGGCCTAGTCCACAGGGGTGTCATTCGCACGAGCTGGGCTTAGAATCACTGTTTTCGGTCGCACCTGTAGGGGGACATGGGACGGATTTCACAACAAAAAAGCATCTCTCATGATTCGATTCAGATAGAGGAGAGGAGGTAAATGCCAACAAATCTATTCTTTAGTACCGCCTGACGGAACGTATTTAATCCTCGCCTCAGATCGACACCGCAGGGTAGCTGAAGACGTCCGTTCTTAGACATTTAGTCGATCATTTGTTATGAAACAGAACTAGGAGTCCGTGCCCTTCAGGCCGGCTCAGGGGCACCTACCTCCAGATCGCCCAGGTTGGGTTTATTAGGCGCCGAAAAGTTACTGCCCTATCAGCCCCTCCAATCCGACCTACGGACATCATCCCACTGGCTCGCAAAATATAAATTGCGGATGGGAAAGGATAAGGAAAATCATTACCTACACAGAAGGACAATGTCAGTTCCAAATAACACTGATACTTTCGGAGCAACTTGGTCCGGAAATGTAAGTACGACTATAGCCCTTTCGACCAACGCCGACAGTCCTATTTGGACGCCGAGAGAGGCGACGGGTAGCCGAATGTAAAGCTCTCGGGTCGCTCTTGGCGGAATGCGCTGCGGGTCCTACCCTAAACCCTTACCACCACCAACTTCGTTAGGAGCCGTATAGATTACAGCTCCCGCAAAATTAGAGAGGAATCTGAGTTATTAGCTGAGGACCCCGCATTTTCTGCGACGGCGTAGCTGCAGTGACGTACGATATGAGTTCCCGACTGTGAGGGAGTCCCAGTCGTGACTCCCTACAACGGCTCCAGATATTGTTACTTATGGTCAATATGCCCCGACCGCCCATTGTCTCGAGTACAGTCTTCCCCAAAGTTAAGCTGTGCATTACCTTACCGTTTTAGGTCCAGCTGGTAGCACCGAATGCTGCGCAATCCGAGCCCCCGAAATAGACTACGTGTCCACGGTCAATTGTCATGGGTAGCAGAGCTCAAAGAGGAGAAACGTGCCCCGTAAACCTATTAGATCTCGGTTGATAAATATCAGGCCACAGCAGGCTGCCCGATGCTTGTTTGAACAACAACTTCGGGAGCCGCGGTCCTTGGTTCTCCCGATATTCGGCCGCACCGAACGGTACGCGTCATCGCGAGGTGCGTTCTCGCAGCAAGAAATATTTGTTGTTGTTGTCTTCCTTCCGCATAGGAAACCTTAAGCGGTACCTTTCTACGAAGTTGAACCCTAGAAGCACGTGTAACAATTTTTTTTACGCTACACCCGGATCTGCTTCCATCTGTTGATCATATGAGCCTAATGTGACTAATCTGTGCCGTCGATTGAAAATTCGTTCTGAACCTAATCACATGAATTAAAATTAGGGCGAGAATTGGCTCCTTTTGGGCCGTAATCCTTCAAAGGGTTAACCGAATTTAGCCTCCACGGTGACACAAACTCCCATAGGTAAGGCAAACCCAATAACGAGGAAGCCTTGCCCACAGCATGTTTGATAAATACCCTTAGGGTAATATCGCGTGCAATACTGAAGCCGCTCTTCTAGCATCCGTGTTTGACATACTATGACCTTGAAGCCTGCCGCAGCTTCTAGGTCATCCAAGTAGATCAAAACGCCATGTTGTGGATCCATGCATCTTCCCAGTGAACATGGATCTTAGTGTGACAGGCGAGGAGCGGCGAACACTATCGGTGTGGCAAGCTCGGGCCTTCGTACGTTGTGGAAGTATGCGAATAAGGAGACCGTAATGTATCAAGTTCTTAAGAGCCTTGGTACCGTTGCAATTCGGCATGTTCCTACAGAGACACTCCGTGTTTGTCATCCGTCATAGATCTATGGCGTAGTTAGCGCCTCTGAAGTAGTTGTCCATTCAGCAGGCATTGCTTAGGGAGTTTCTGGCGCTTGCCGCTCAAGATGCTCACGGGCCTAAGTAGCACGGCAACCTTTTGACAAAGCATTTTATAAACTGAGCATATTGGCCCGAAACTAATCCAGCAAAGGGTGAAGACCTGTCAGCGGGCCCAGAGTGTGAACGGTCTACTGCGCGGTACATAAGTGGCGTAATCCATCAACAAGACCTACACGACCTGAATGATTTCCAACAACTTTATATGCTTTTCCGCATCTCGAGAGTACCGGAATCTATGCAATCTCCCAAGGATCCGTAGATTTGAAATTCAATCCGACGGGGTAAGGTTGCCGCGCCGGTTAGCTAATGTGCGGATTTATAGTCTTTTTCCCAGAAGGCGTAGTTAGTTTCGCACCTAACTACGACACATACTTGGGTCGACTGTTGAAGGTGGTAAGTTGCGAGCAGTCCGCCGCTCTCACGCGCCGAACCACGTTCATATCGGCAAAGTTGCGCGATGACCTATAGGTGTGCAAAGCTCGTCCGACATTGGGATTGGATTCACGTACATACGTTAGTATCATGGGTAAGCTTCCATGTCAGCCTCGTGTATAGCACCGGTGCGCCGCGCGTTAAGGATTCTATGCCCAGCAAATGTGCCAACGTTGTGGGGAGAAAAGTGTAGTTGGATGCGATCGTGACATCGGCACACCGAAACTCTGCAGCCAGTCCCGCTAATCTCATTGGCACCGGGTAAGAGATTACCTTTGGTTAGGAATCGCGTGCGACGTACTGCACGAAAACAGTGCCTGAACCGAGGTGTTTACTTAGATGGTTCTAGACCCAGCATGTTCCTCACTGGAACCTGACGTCGGTACGTGATCCTCTATACCTCCTTTTCGGTATTGGCCTGGCAGCTACTCTAACTGTTTGGGCCGCGCCGATTTCTCGAGTCCACACGGCGAGGTCAGCAAAATTGCCAGTTAGTGGATGTTGGGATCTCAACGCATTACCATGAGAGTTCTTGGTTTACCCGTTAACATCGCTGCGCACGGTGTGAAAAGCCTGTTTCTTTGGCCCCCATCATCTTCGGCCCGCAGATCTCAGATCAATGATGTAAGGTTGCGGCGGCAAAGACTAGACTTGAGTCGTGAGATGGTGCTTTGCTGAGGCCGTCTCCTATAGCTTATTCTAGGACTTTCCGCAAACCACCCGACGTGCGGCTGTCCACGATCGGATTCCATTCTGTCTCGGAGCATACAGCACTAGATTTGCCGCTTGAAAAATGTTCCATAACCATGATTTCAACCCCATCTAGTCGGCAGGCACAGCTGAGAACAGCGAAGGGCGTCGTGAAGGGCATTGCCCGTAGTGTTTCAGACGTGCTAGAGACTAAATCAACTATCTGCACTCGTAGCCTGGCGTGTGAGATGTCACCACGATGTGCCTAGAGGAGTGATTATGAACATGTATTACCACGTCCGGGTGTCGACGGCTATATGGCTAACATTTCTTATGGCTAGACGTGCTTGGAAAGGTTCCCCAGCCTTCTGTTTCCCGGTGCTTTCCACGAGTCTGGAGTTCTGGTAATTAACTACATGGCGTTAACGCGGAGGTAACCCCCAGTCATTGCATTGCAGGTAGGGCTTAGGTGCAATATAATTCACCAAGGCGCGGATTCCTCACGATTGTTACGAAGACACCCGGAGGGTTTCAGTATGGCTTGAGAAGTGTACGTTTTTCCGGCCAGGGTGTAACTATAACCAACACATGTTTGGCCACGGGCTAAGTCGGTCCGCACGACTGATTTCCCCCGCCCATGTGTTTGGGAGCAATAAACTGCGTCTGCCAAGAGTAACAACTCGAGTAGAGAAGGGAAGTCTCAGACTATTTTGCAAATCAGACTGTAAGGCTCAACAGCCATACAGCTTGCCCTACTACTGAATACTAGCGTAGCGTGGCCACATAGGAAAGACTTCATGTCTTCTAATAACCTTTTACCTCCAACGTCCCCGCCGTCTTCACGCGGTCCAACGATGAGGAAACAACCACCCCTATCTTCCGCGGAGTGGTTCACACGACCCCCGGCGTTAACGCGCACGTTGTTGTCTTTCGGGACGGCACTACCCCCAAATGCCCAGACCCAGTGCTAGCGATATTCAAACGCCGTCCGGTAAGTCCTGACGTTTTTCAACTGGATGCACTGGCGACACGTAGTTCGCAAGGCGTCCATGAGAGGTTTTAACCGTCATGTTTCCGTATCACGTCTTATGTCTGTCTCTATTCTCAGCGAAATTCTCATCATAGGGCGGAGACTATCTGAAGGCCAGCGAATACAAGATTTAATATCAAATATAGCATGGGGGCCAACAGAGGCCCCCCTGGTGCTGACGAATTATCGTGATATTAGTACAGCTGTCTGCAATGCCATTTCGAAGGCTTTTTGTTCGTATCACTGCTCTATGCATAGCGGTCACTATGACCTCTCAGCTTGACTCACCCGAATGACCAATTGTGGTCCAGCACTCCCTCATCTTCCCCCATTAACGATACGTTGGGCACCATCGGTGTGAGCTACCCGTTACAGTCATAGAATCGTTCTTTGCGTTGTACGCGGCACGGAGGTGACCGGGAAAAGCGCCGCGAAGGCCCCGCACTGAATAAAGCTAGTATTAGCGTCTGTCAAAGTGTTTTGACACCTAATTCGCTTCCAAGTCCCAATATCTAATCTAGCCTGCTTTGGGCCAACATCTCATTGCGTTATGCTAATGAAGAGGGTGCGGGATCACATCCGCTCTTCTCTTCCTATACACAGCGGACATTCGGGTTGGACGTTTGGAGTGATAATTTATCGTTAGGGATAAGTATGTCGGCGCTTAGTAGTATAGCCCGCTGACCAGCGTTCGATTTCGAACCTTACTGGACATTCTCAATAACTACTGATCATGACGTTTTCCTCAGTTCCTAGCCTTGACAACTAGCCACAGTCAGCATGGTAGAGAGCGTTGAGCCGGGGATAGCCAGGCTATTAAGACAAAGACCCTCGGGCCCCTTAATGCGCGTCAAGTCTGACGGTTTGAGTGCGGAGCAGTAAGCGCTTTGGTATAACCGTGACGTAGCAGATCCATGCTTCGCCCGCTTCCACCTGAGAGATACTAGCCTCTTTCGCACTTTGTAGGATTACGGGCAGCGAAATATTTATCCTGTGCGGCGAGCCCGCTTCGGTTTCGAGCTCTATCAGTGCGCGGTTGGCACTCCAACGCACGATAACATATACCCGCCCACAAGGCCATGCAGGTTTAACCTCCTATTCTGATTGTACCTGGCTGACTTTACGGTACCCACCAGCGCAGGATTAATAGCCTAATTATGCTAACCGGTGCTAGTCTAACTGCTGTTACTAGTCCGCCCCAGCTACCCCACGGGTCAGTAACTGCACCAGCAAGCATGGTTCTCCTCCTGAAGTTGTACGTTCGAGAACCCCGTATCGAGTTGGTATATAAATTAAGGGTTGTCTAAAACAGAAGCCTATTCCGCTATCATCGGTGTAATAACTGATCGCGCCGTGGTTAAATGGAGGAGCACCCGCATGGATACATCGCTAGCGTCTTGTAACTCTCTGGGGGCCTAGTATGGAACGGAACAATGACATCATTGCTTACGGGGCCCGCACTTAGCTGTCGCGTATCGCAAATCATATGGCATGTCAGTCCCGACATCACGAAAATGACCCCATCTGAGGTGGTCGGGAGGCGAACAGTCGAATATGATGTATGCACCCGCAACTTAATGTTCAAAGGCGGGCGAAATGCCTTCTCCCGTCCGGACTATCCTGAGTGCTAGCCGCGAGTCTGTAAAGGTTGACGCAACCATATAGCACGCAGAAAAATCACTCTCACACCATGAGAACCATGGCGGCACGCTGTCTACTTTGTCTGACAGGCTACGGAAGGAATGGTACATACGTACAAACGGATGATATGATATCGGTCATTGCCTATTGTGACGCTACCCTACTGCATCACCCCCTTAGAATGCGTTGGACGCTCTATAGCAGATCCTCCATCCAGTGGAAGTCTCGTCGCCGTGGTTTGCCTTAACGACCGTTGGAGAGAGCAGGACAGAAATATCGCCCTTTTGAGCGCATTATTTGGAATCGAGGTAAGTCAGTGCGGCATAATCGCGCCTCGTGAGCGGAACAGTTTTTGATCCCACCCGCTAAATGCCAAGGTGCTGTAACCTGGGCGCGACACCAAAAGACCACGTGCTGTATGAAGCATGTGTTCTAGCGCACTCTCAACCGTTACCCCGAGAGTAAAATGTTAGTTGTAGGCCGATTCTGCAATGGTAATTGGCGGAGTGTCTAGGGAAATGTTTCGGTCATACTTAACCGGCTACCTCTTCCTCCCTCAGATTCGGTCTGAGATGAGATATACTGGGTGAGTTGAGTCGCCCTGTATCGTTGCGGCGCTCGTGGACCAGACAGACAGTTCCCGTTTATCTCTGCTTCTAGATGGAGGGTCGCCTCCGTGTTAACGCCGGCGAAGGTAGTCGCAGCTGAAGTTGTGATGCACAATCAGGTGAGCCTTTTAAGTATGGTCCTACGGACGTGAACAGCTGGGCCCAGTCATTTAGTACGGGGGGTTTACCTATAAGGATACGGTAAGAACGTCATCTATCCGTCCCACTGGAGTCCGAGGGGTTCGTGTCTACACGGATTACTTATCATGCACACACGTCTACGGTCATGCATAAAGTTGTGCAGCGCAGCAATCGGAGCGGAGTTACACCATCTCCCTATTAACAAGGCACTTATTAGTACTTACCCCGTTATAGAGCTCTCATCTTATCGATAGAGCGCAGTCCTAAGTATTGGCTCGAGTGATTCGCTCCTCAGCCCTTGATTGTAACTCCCCCGATTGCAGGTTGTATGGTGAGTAAAATCTCTGCGCCCTTCTGTTCGGATAAAGAACCCCGACCACTAATGCCCGCCTGCTTGTTGGGCGGTAAATGGGTAACGGAACATGGACTATGAGTGCGATGATGGTCAATAGAATTACCTTATTACGCAGTAAAAGGAATGACGCAGACAGGTATTTGTCGACGATTGCTTCGAACCTGGCAAAATGGGGAGGTATCCTGTCATGTTCATCTGTAAAACAACTCCTGCCTCTTCGTAGAGGACACACACTGTGGGCCTTTAGCCTTTAGCAGCCCATTGGGGCTTACCAGCTGTCGTCATGGGGTATCATTAAGATCCATGCGCCCCCGAAACTTACTGCAAAACAATATGGCTTAAAGGTAAAGGGACCATCAGGAGAATGCTTAAGAGCGACATATAGATACGTATTTAATTAATTTATGTTAACGCAACCATCTCGCAGGAGTCGCATAGCATATTGCCGGGTGATAGTTAATGCACTGTGCTTCCGTGTTTATATAAAATAAGCAGTAACCTCTGACAGGTTGAGACTCCAACAAGTGCTCCGGGTATTTACCTTCTACCATGGCGTTCTAATATCACGAAAGAGAAATTGTGTGTACCGATGCCAGGTGACCGCCCGCGTGCGCCAACGACGCAATCTAGAGCATCCACGCTGAATTGGGGAACTCTTGCCGTTCGTCGCATGGTGTACTTGGTACCACTCGATATGCCTGATTAGGTTTGGCCGTAGCACGTAAGGTAGTGACTTTCCATTCAAGCTAGCGAAGCGACACCACCACAGTGCCCGGTCAAAATAACCCACACCTGGCCAGCATAGAGGCTAAAATAGCTACAGTGCGCTAATCGAGTGTTTTTGCATCGGCTCGTGGCTGGTGGACTCGGGACAGCTTAGAACTAACTCTGGTGTACAAACGCGATCGTAGCTCTCGCGACTTACTCACCGGAGTAGGTTAGATGGACAAGACCTAACCCGAAGCCTAAATCGCCCTGAGTGTTAGCCGCCATTCAATTCTATGGTTTATCGGGGGCGTCTATGGCTGCGACAGTATGGAGGCCCGTTATGGGCACCCGAGTATCGTACCATAGTAATCCCATATTCCTCTTCGAGCGACTATTGGATCAACATACCTACAGGGTAGTATGAATGTTCTTGATTACAGAAACCATGGAATCGGCGCATTCTATGTTTCACTTCCGAATAACAGTGAGCAAGGCATGCCCTTGACAAGGATCATCCCGACAGCAAGCCGATCGGGCCCTAGAGCCCGACCCCCAAACAGAACACCGGCCACGTAGTTGCTGGGACTAAACAAAGGTGTGTTTCCATAAAAGGAAATCTTCAAGTGTATTGTTGAGTCGTAACGCTTATATTTATGGCCCAATGGGCGTTGCGAGCACAGTAGCAGGCCTAGATGAATGCCTAGGCCACGATCGGGGGGAGGCTCATTGAACGTACTGCCATACCAAGCCCCCGTATGCTATGGCAGGAGGGGTTCTCTTCGTATAGAGCGAGGGTCTCTACGCCAAGCAGCATTCCCGTGTTGGGTGGCCAATGGGGCTCACTAGAAACTCGGTTTTTTTAGCGAAGGAATGAGCAAACTCGTGAAAGGTGGTACACACCAGTTGCGGCCGATTTGTTGTAGCAACAAGGTTTGAAGAATTGAGTAGATGGGCCAATTTACCTCCTATTTAGCGAGTGAGATGGCGCATGTTTATTCAGACTCCATGTGGGGTAGAGGCTAATCGTTTAGTAGCAATAACCCCGCGGGGCAAGAGACCGTAATAACTTGAATCTGTGGTAGCTATGAATATGTGCTTCGCCCTAAGTGTTATGTAACAAGAGTGATCCAGGGGCTCAGATCACACTTAGTACGATCCGCTACTGAAATGCGGCCGCGGGCTTGCACGCTGGACATAAGTCGGATAATCAATTGCCTACGACAGGTTCAGCCATAAGGCTTGGCTCCTAACACACTCATGATGTCTGGCTTTTACTCGTGCCCGGACATAAACGTATGCTCAAACGCGAGACAGGGGAGGGTCAGCACCGTTTAGATCTATAAGGCCTACCGGTAATATGGATCGACAACAAACAGATGCTATAGGGATACCTACTCCTTTGGACCCACATGTAGATGAAGGCAAACACGCAGAGCAAAGGAGAGTAGTCCACCCGGTATAAGTTTGTGCTTTGAATTCTGGCTACGCAGACTTGCACTCTGTCCCGGCATTCACTATACTTCTCCGGAAGTCCTTTAAGAAATGTCCGCGCTCATGTGGTTCCCGTTGCTCAGGGGCCAACTCAAGTAGATCTTTAAGGCGCAGTCGACCACAGGCTACTAGATACGAGTTATACTTATCCGGACATCTGGCTAAATACTTGGATACGATACTTCCCCAGTCGTGAGAACGAAGCTAATACAGATCGAATTTCGATGGTTCAGGCAGGCAGTTCTCAGGAGGCAAGGTGTTAAATAGTTTCGGAGGCTCTTTCGTACGATCAGGGTCTACTACCCTAGGGCATTTTGACTTTGGATTAAATATGCAAAATGCAAGGCCGATTGTGATCAGTACTGATACTCCAACTGGACCACCTTCAGACCCTTCGAGGGGACCTAGACGACGGGAACCCTTCCAGCGGGTGATACCAGTTAGAGCAAGTCACAAACACGATTCAGCCCCCGGGGTTTATGACGTACCATGCGAGTAATAATGCACGTATACGGAGCTCTTCCACCGAGCGATGGCATTTCGGGGCGAGGTAGTTGTCTTTCATTGGCATCGCACAACCCCCATCCTCTTAATTGGCATCGTCTCCAGCTGGAAAGAATTTGAGTGAGCATGTCGCCCCTATTATTCCGTTGCCAATAAAGTGTCTCAACTTTTGGCGAAGGTTTTAACGCATACAAGGAGAAGCCGCGAGACGTCTGTACCGCTGATCTGGACGCAAAGTGCTCGGACTGCCGCTGAGTTATCCTGGACGCCATGATTAGAGCCGTCGTCACTACCTGCATACATGGGCCGATAGAGTACTGCAACCAACAACTCACTTAAGCTCCACAACGGCTGGACACTTCCGAGAGCGGTCTTACACAAACGTTAGGTCCTGGGCCGCCGACCTTACCGCTAGTTAGTGAGAGCCAGTTAAAATTATGAACGCTCGGAACCTTCCCAACAGTGGCCGCAGCCTTCCTTGACGCCTAGCACATCTGGTTTATACTCGGGTATGCCGTAGATCGGTAACCTAGGGAACGACCCTGTGGGTTTAACACCCGAGTGCGTAATCAAGCCTAGAGGCCATCTCAACTCGAGAGGTCTCCTGACAAAGAGGCGCCCGATGAATCATCCAGAGGCGTCTGGCGGTCCTACGAGAGTGGCTTTGGATGCCTGCCCCTTGGATGGATCTGTCTTTAATCGGCGCCAATACCTAGCACTGCTAGGCTCCAGACTGTGTTTACATGCCGTAACCCTGATACTCGCAGAAACGTTGCTGGAAATTCCTAGCAGCTGAAACCATTCCCCGTAACGTACTAGTACGCTAAGAGAGAGTCTCTCCTGGCCCTGATGAGTGTGTTCTCATCTGGGGCACGATACAAGAATCGGAACGAACGCAATGCCGAAGTCCCTTGTACCTTAATTTGGGCGACGCAGATAGACCCAAAGATCGCGGACTACGGAAACTAGCATAGGACCTGTGTCGAGAAGGTTCGAGCAGGTAGTGACACGCAGCGCGGTGGCCGGCGGGGTGGCACATTGCGGGTCAATACTGGTAGTAGCCACTCTTTGGACATAGCGGCGGACCAGCGCCTAGAATGTCTCATTCTCATTTTGTTCCGTGGCACGTTACGTAATGACGGCCCGCCAGCACCTGTGTATGGACTTGTAGCTCGGGCCTCTGGTCCTGGCACGACAAGGCACCAGCCAGTAATCTCTCCTAAGGCGCTAGCGTGCATAGCGCGTCTGCCTACCGCCAGAGAACGCGTCATCTGCAAGACGTCCCAGCGTAGTGAATTGTAACTGCAAGCGTTCTCTTACGGTCATAGTGCCGATTTTGAGCAGTAATGGAAGCAGCAAAATGCCGCCCAAGCGATTCGCAAACTTCTAACAGAGCTACAGCCGGACACGACGCGGTGGTGCTCGCGGTTGGTGATCTTATGATATTAACGCCCATAGCGGCCATCTTAATCGACACCATGTTCGTTTTGGCAGGCCTTGTGGTAAACACGTGCTAGTGGCACCACCCATGCCCGTGCCCATACATCCAAACCGAGAGAAAGCCTATTTAAGCGAAAACCACAACTTCGAGGTTTCACCCCCTGCCATTGATAAAGCGAGGAGTACCCCCGATGCCGGGAAGCGTCCGCACCCATTTCTTTCGTTCTGGAATCCTCGGGCGACTTCTCGAAGATACTGTGCTCACGACCTGGAGTATCATGAACAATCGGAGGAAAATGAGTAATTGTCGAGTCGTTGTTAGACGGCACTTCCGTCCGGCCCAACTGTTCTCGGATACGTGTCCCGTGGTCAATGCTCTAAACCGGCTGCCGGCGACTCAGTTCACTGAGACAAATTCTGATGCTTTCGAAGCAAGGATGCGCCCAGAGCAGAGCTGCCCAGATGAGGTTAAGAACGTAACTATAATCGATCAGCCATTCGGCTTAAGGGGCCCCGGCGAAACGCGAAACACTTGGCACATGGACGCTTCACGCGCAACAGTAGTTGTCTCTTTCGTGAGCCACCGTAGCAGCTAGAAAGGCCTATCCAGTGATGCTTTATGACTGAGTGTCGAATCTAGGTATAGCATAGACTGGCTGATCGGGCGGGTCGGCCCACCCGTCTCGGTCGAGCGGTTCTGACTTTGGGTGGCTGTGTGAACCCAACTGCAGATGGAGTTGAATGGGTACACCCTATGCGAGGCCTCGTCTTTACACCAAATCGGGGCCCTGTGAAGTGCCACTCTTTTCCAGCCGGCAGCCGCTCAGTCTGATTTTGCTTGTACATGTCGTGTGCGAACGTTCCGGGAGGCTTCCGTGTTCCAAATACCGTGTTCTCATATTCGGTCCATCTACCGACGGAGAGTTGGGATGCCCGGGCCCGGAAATATAATTTAAACTCGTGGCCAAGAATTTAGCATGTTGTAAACATGAGAGACAGGGCCGGGCTAAAACATTACCCCTGAGTAATGTAGAGCCACAACTGAACATAACATTGGGATCTAACGCACGCAATCAGTGTAGCTTCAGCCCACCCTCTAAATTTCCCCCGGACAACTGGATTATCACCTGCGTCACGCGATAATTGCTCGCATCTCACCAACACACTTCGACAAATCTGGAGTCTCCCTGGTCCGTACGTCCAAAACCGTTTAAATGGGCGGGTGTGTCGTGAACCAATCTCCTCTTCCATTTGTCACATACTGGCGATGACATCCTTTTACTTGAATTATTCATCCGGGCACCAGCCGCTTTCCCTACGATCCCCGACACTCGGGGCTTCGGGAGTTGCCCGCCAAAAAACCGACAAACCAAACTATACAATCAATCCCATCTAGATGTAGGGGACTGAGGCTCTAAGCTATGCGCCTACTATACTTTGTAGGTATCAAACTACGCTTGAAGATAGTTGATAAGGAAGCGAATTGATCGAGTACCGTATCTTCAGTCCGACTCCCGTTCGAACGCAGCACGCTAACATGGTCCACTGGCATTCTTACTAAATACCTAGTTCACTTCTACATGAGGAGTGTCTGGGCCGGACTCACCTTTGATTAGATAACTGAAG diff --git a/public/biopet-core/src/test/resources/log4j.properties b/public/biopet-core/src/test/resources/log4j.properties index 501af67582a546db584c8538b28cb6f9e07f1692..3dd3a7d603b3c239b2662eb543da126b6f23a672 100644 --- a/public/biopet-core/src/test/resources/log4j.properties +++ b/public/biopet-core/src/test/resources/log4j.properties @@ -15,7 +15,7 @@ # # Set root logger level to DEBUG and its only appender to A1. -log4j.rootLogger=ERROR, A1 +log4j.rootLogger=FATAL, A1 # A1 is set to be a ConsoleAppender. log4j.appender.A1=org.apache.log4j.ConsoleAppender diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/MultiSampleQScriptTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/MultiSampleQScriptTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..5bb2948830fba23db63cac137db92f584b81fec0 --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/MultiSampleQScriptTest.scala @@ -0,0 +1,208 @@ +package nl.lumc.sasc.biopet.core + +import java.io.File + +import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender +import nl.lumc.sasc.biopet.core.extensions.Md5sum +import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.utils.config.Config +import org.broadinstitute.gatk.queue.QScript +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.collection.mutable.ListBuffer + +/** + * Created by pjvan_thof on 12/29/15. + */ +class MultiSampleQScriptTest extends TestNGSuite with Matchers { + import MultiSampleQScriptTest._ + + @Test + def testDefault: Unit = { + + val script = MultiSampleQScriptTest(sample1 :: sample2 :: sample3 :: Nil) + script.outputDir = new File("./").getAbsoluteFile + script.init() + script.biopetScript() + + script.functions.size shouldBe 5 + + script.samples.foreach { + case (sampleId, sample) => + sample.gender shouldBe Gender.Unknown + sample.father shouldBe None + sample.mother shouldBe None + sample.summaryFiles shouldBe Map() + sample.summaryStats shouldBe Map() + sample.summarySettings shouldBe Map() + sample.sampleDir shouldBe new File(script.outputDir, "samples" + File.separator + sampleId) + sample.createFile("bla.txt") shouldBe new File(sample.sampleDir, s"$sampleId.bla.txt") + + sample.libraries.foreach { + case (libId, library) => + library.libDir shouldBe new File(sample.sampleDir, s"lib_$libId") + library.createFile("bla.txt") shouldBe new File(library.libDir, s"$sampleId-$libId.bla.txt") + library.summaryFiles shouldBe Map() + library.summaryStats shouldBe Map() + library.summarySettings shouldBe Map() + } + } + } + + @Test + def testTrio: Unit = { + val script = MultiSampleQScriptTest(child :: father :: mother :: Nil) + script.init() + script.biopetScript() + + script.functions.size shouldBe 5 + + script.samples("child").gender shouldBe Gender.Male + script.samples("father").gender shouldBe Gender.Male + script.samples("mother").gender shouldBe Gender.Female + script.samples("child").father shouldBe Some("father") + script.samples("child").mother shouldBe Some("mother") + } + + @Test + def testGroups: Unit = { + val script = MultiSampleQScriptTest(sample1 :: sample2 :: sample3 :: Nil) + script.init() + script.biopetScript() + + script.functions.size shouldBe 5 + + script.samples("sample1").sampleGroups shouldBe List("1") + script.samples("sample1").libraries("lib1").libGroups should not be List("1") + script.samples("sample2").sampleGroups shouldBe List("2") + script.samples("sample2").libraries("lib1").libGroups shouldBe List("3") + + script.samples("sample3").sampleGroups shouldBe Nil + } + + @Test + def testOnlySamples: Unit = { + val script = MultiSampleQScriptTest(sample1 :: sample2 :: sample3 :: Nil, List("sample1")) + script.init() + script.biopetScript() + + script.functions.size shouldBe 1 + } +} + +object MultiSampleQScriptTest { + val sample1 = Map("samples" -> Map("sample1" -> Map( + "tags" -> Map( + "gender" -> "blablablablabla", + "groups" -> List("1") + ), + "libraries" -> Map( + "lib1" -> Map("test" -> "1-1") + ))) + ) + + val sample2 = Map("samples" -> Map("sample2" -> Map( + "tags" -> Map( + "groups" -> List("2") + ), + "libraries" -> Map( + "lib1" -> Map("test" -> "2-1", "tags" -> Map( + "groups" -> List("3") + )), + "lib2" -> Map("test" -> "2-2") + )))) + + val sample3 = Map("samples" -> Map("sample3" -> Map("libraries" -> Map( + "lib1" -> Map("test" -> "3-1"), + "lib2" -> Map("test" -> "3-2"), + "lib3" -> Map("test" -> "3-3") + )))) + + val child = Map("samples" -> Map("child" -> Map("tags" -> Map( + "gender" -> "male", "father" -> "father", "mother" -> "mother")))) + val father = Map("samples" -> Map("father" -> Map("tags" -> Map("gender" -> "male")))) + val mother = Map("samples" -> Map("mother" -> Map("tags" -> Map("gender" -> "female")))) + + def apply(configs: List[Map[String, Any]], only: List[String] = Nil) = { + new QScript with MultiSampleQScript { qscript => + + override val onlySamples = only + + var buffer = new ListBuffer[String]() + + override def globalConfig = new Config(configs + .foldLeft(Map[String, Any]()) { case (a, b) => ConfigUtils.mergeMaps(a, b) }) + + val root = null + class Sample(id: String) extends AbstractSample(id) { + class Library(id: String) extends AbstractLibrary(id) { + /** Function that add library jobs */ + protected def addJobs(): Unit = { + buffer += config("test") + } + + /** Must return files to store into summary */ + def summaryFiles: Map[String, File] = Map() + + /** Must returns stats to store into summary */ + def summaryStats = Map() + } + + /** + * Factory method for Library class + * @param id SampleId + * @return Sample class + */ + def makeLibrary(id: String): Library = new Library(id) + + /** Function to add sample jobs */ + protected def addJobs(): Unit = { + buffer += s"$sampleId" + addPerLibJobs() + add(new Md5sum(qscript)) + } + + /** Must return files to store into summary */ + def summaryFiles: Map[String, File] = Map() + + /** Must returns stats to store into summary */ + def summaryStats = Map() + } + + /** + * Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given. + */ + def addMultiSampleJobs(): Unit = { + add(new Md5sum(qscript)) + } + + /** + * Factory method for Sample class + * @param id SampleId + * @return Sample class + */ + def makeSample(id: String): Sample = new Sample(id) + + /** Must return a map with used settings for this pipeline */ + def summarySettings: Map[String, Any] = Map() + + /** File to put in the summary for thie pipeline */ + def summaryFiles: Map[String, File] = Map() + + /** Name of summary output file */ + def summaryFile: File = null + + /** Init for pipeline */ + def init(): Unit = { + } + + /** Pipeline itself */ + def biopetScript(): Unit = { + addSamplesJobs() + addSummaryJobs() + } + } + } +} \ No newline at end of file diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ReferenceTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ReferenceTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..79741a2c2eb73a39cce67d8ad48b5680ba939163 --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/ReferenceTest.scala @@ -0,0 +1,79 @@ +package nl.lumc.sasc.biopet.core + +import java.nio.file.Paths + +import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging } +import nl.lumc.sasc.biopet.utils.config.{ Configurable, Config } +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.mockito.Mockito._ +import org.testng.annotations.Test + +/** + * Created by pjvan_thof on 12/30/15. + */ +class ReferenceTest extends TestNGSuite with Matchers with MockitoSugar { + + import ReferenceTest._ + + @Test + def testDefault: Unit = { + Logging.errors.clear() + make(config :: testReferenceNoIndex :: Nil).referenceFasta() + Logging.checkErrors(true) + + make(config :: testReference :: Nil).referenceFasta() + Logging.checkErrors(true) + } + + @Test + def testIndexes: Unit = { + make(config :: testReferenceNoIndex :: Nil, fai = true, dict = true).referenceFasta() + + intercept[IllegalStateException] { + Logging.checkErrors(true) + } + + val a = make(config :: testReference :: Nil, fai = true, dict = true) + a.referenceFasta() + a.referenceSummary shouldBe Map( + "contigs" -> Map("chrQ" -> Map("md5" -> Some("94445ec460a68206ae9781f71697d3db"), "length" -> 16571)), + "species" -> "test_species", + "name" -> "test_genome") + Logging.checkErrors(true) + } + +} + +object ReferenceTest { + + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val config = Map("species" -> "test_species", "reference_name" -> "test_genome") + + val testReferenceNoIndex = Map( + "references" -> Map( + "test_species" -> Map( + "test_genome" -> Map( + "reference_fasta" -> resourcePath("/fake_chrQ_no_index.fa"))))) + + val testReference = Map( + "references" -> Map( + "test_species" -> Map( + "test_genome" -> Map( + "reference_fasta" -> resourcePath("/fake_chrQ.fa"))))) + + def make(configs: List[Map[String, Any]], + r: Configurable = null, + fai: Boolean = false, + dict: Boolean = false) = new Reference { + val root = r + override def globalConfig = new Config(configs + .foldLeft(Map[String, Any]()) { case (a, b) => ConfigUtils.mergeMaps(a, b) }) + override def dictRequired = if (dict) true else super.dictRequired + override def faiRequired = if (fai) true else super.faiRequired + } +} \ No newline at end of file diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummarizableTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummarizableTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..1be487ff6d1309685d004d4b2f8f8b5523a93d75 --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummarizableTest.scala @@ -0,0 +1,32 @@ +package nl.lumc.sasc.biopet.core.summary + +import java.io.File + +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvanthof on 14/01/16. + */ +class SummarizableTest extends TestNGSuite with Matchers { + @Test + def testDefaultMerge: Unit = { + val summarizable = new Summarizable { + def summaryFiles: Map[String, File] = ??? + def summaryStats: Any = ??? + } + intercept[IllegalStateException] { + summarizable.resolveSummaryConflict("1", "1", "key") + } + } + + def testOverrideMerge: Unit = { + val summarizable = new Summarizable { + def summaryFiles: Map[String, File] = ??? + def summaryStats: Any = ??? + override def resolveSummaryConflict(v1: Any, v2: Any, key: String) = v1 + } + summarizable.resolveSummaryConflict("1", "1", "key") shouldBe "1" + } +} diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScriptTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScriptTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..a53fe068f9537cdce91a75f45ef9393631377d0d --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScriptTest.scala @@ -0,0 +1,126 @@ +package nl.lumc.sasc.biopet.core.summary + +import java.io.File + +import nl.lumc.sasc.biopet.core.BiopetQScript.InputFile +import nl.lumc.sasc.biopet.core.extensions.Md5sum +import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable } +import org.broadinstitute.gatk.queue.QScript +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test +import SummaryQScriptTest._ + +/** + * Created by pjvanthof on 14/01/16. + */ +class SummaryQScriptTest extends TestNGSuite with Matchers { + @Test + def testNoJobs: Unit = { + SummaryQScript.md5sumCache.clear() + val script = makeQscript() + script.addSummaryJobs() + SummaryQScript.md5sumCache shouldBe empty + } + + @Test + def testFiles: Unit = { + SummaryQScript.md5sumCache.clear() + val file = new File(s".${File.separator}bla") + val script = makeQscript(files = Map("file" -> file)) + script.addSummaryJobs() + SummaryQScript.md5sumCache should not be empty + SummaryQScript.md5sumCache.toMap shouldBe Map( + new File(s".${File.separator}bla") -> new File(s".${File.separator}bla.md5")) + script.functions.size shouldBe 2 + assert(script.functions + .filter(_.isInstanceOf[Md5sum]) + .map(_.asInstanceOf[Md5sum]) + .exists(_.cmdLine.contains(" || "))) + } + + @Test + def testDuplicateFiles: Unit = { + SummaryQScript.md5sumCache.clear() + val file = new File(s".${File.separator}bla") + val script = makeQscript(files = Map("file" -> file, "file2" -> file)) + script.addSummaryJobs() + SummaryQScript.md5sumCache should not be empty + SummaryQScript.md5sumCache.toMap shouldBe Map( + new File(s".${File.separator}bla") -> new File(s".${File.separator}bla.md5")) + script.functions.size shouldBe 2 + assert(script.functions + .filter(_.isInstanceOf[Md5sum]) + .map(_.asInstanceOf[Md5sum]) + .exists(_.cmdLine.contains(" || "))) + } + + @Test + def testAddSummarizable: Unit = { + SummaryQScript.md5sumCache.clear() + val file = new File(s".${File.separator}bla") + val script = makeQscript() + script.addSummarizable(makeSummarizable(files = Map("file" -> file, "file2" -> file)), "test") + script.summarizables.size shouldBe 1 + script.addSummaryJobs() + SummaryQScript.md5sumCache should not be empty + SummaryQScript.md5sumCache.toMap shouldBe Map( + new File(s".${File.separator}bla") -> new File(s".${File.separator}bla.md5")) + script.functions.size shouldBe 2 + assert(script.functions + .filter(_.isInstanceOf[Md5sum]) + .map(_.asInstanceOf[Md5sum]) + .exists(_.cmdLine.contains(" || "))) + } + + @Test + def testInputFile: Unit = { + SummaryQScript.md5sumCache.clear() + val file = new File(s".${File.separator}bla") + val script = makeQscript() + script.addSummarizable(makeSummarizable(files = Map("file" -> file, "file2" -> file)), "test") + script.summarizables.size shouldBe 1 + script.inputFiles :+= InputFile(file, Some("md5sum")) + script.inputFiles :+= InputFile(file, None) + script.addSummaryJobs() + SummaryQScript.md5sumCache should not be empty + SummaryQScript.md5sumCache.toMap shouldBe Map( + new File(s".${File.separator}bla") -> new File(s".${File.separator}bla.md5")) + script.functions.size shouldBe 3 + assert(script.functions + .filter(_.isInstanceOf[Md5sum]) + .map(_.asInstanceOf[Md5sum]) + .exists(_.cmdLine.contains(" || "))) + } + + @Test + def testAddQscript: Unit = { + SummaryQScript.md5sumCache.clear() + val script = makeQscript() + script.addSummaryQScript(script) + script.summaryQScripts.head shouldBe script + } +} + +object SummaryQScriptTest { + def makeQscript(settings: Map[String, Any] = Map(), + files: Map[String, File] = Map(), + c: Map[String, Any] = Map()) = + new SummaryQScript with QScript { + outputDir = new File(".") + override def globalConfig = new Config(c) + def summarySettings: Map[String, Any] = settings + def summaryFiles: Map[String, File] = files + val tempFile = File.createTempFile("summary", ".json") + tempFile.deleteOnExit() + def summaryFile: File = tempFile + def init(): Unit = ??? + def biopetScript(): Unit = ??? + def root: Configurable = null + } + + def makeSummarizable(files: Map[String, File] = Map(), stats: Map[String, Any] = Map()) = new Summarizable { + def summaryFiles: Map[String, File] = files + def summaryStats: Any = stats + } +} \ No newline at end of file diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/WriteSummaryTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/WriteSummaryTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..4bb196c162ddb93c85b19d1364d34789fb77e8b9 --- /dev/null +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/summary/WriteSummaryTest.scala @@ -0,0 +1,346 @@ +package nl.lumc.sasc.biopet.core.summary + +import java.io.{ PrintWriter, File } + +import com.google.common.io.Files +import nl.lumc.sasc.biopet.core._ +import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable } +import nl.lumc.sasc.biopet.utils.summary.Summary +import org.broadinstitute.gatk.queue.function.CommandLineFunction +import org.broadinstitute.gatk.queue.{ QScript, QSettings } +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import WriteSummaryTest._ +import org.testng.annotations.Test + +import scala.util.matching.Regex + +/** + * Created by pjvanthof on 15/01/16. + */ +class WriteSummaryTest extends TestNGSuite with Matchers { + + @Test + def testWrongRoot(): Unit = { + intercept[IllegalArgumentException] { + makeWriter(null) + } + } + + /** This is a basic summary test, no matter the content this should always be true */ + def basicSummaryTest(summary: Summary, + name: String, + sampleId: Option[String] = None, + libId: Option[String] = None): Unit = { + summary.getValue(sampleId, libId, name) should not be None + summary.getValue(sampleId, libId, name, "files", "pipeline").get shouldBe a[Map[_, _]] + summary.getValue(sampleId, libId, name, "settings").get shouldBe a[Map[_, _]] + summary.getValue(sampleId, libId, name, "executables").get shouldBe a[Map[_, _]] + + summary.getValue("meta") should not be None + summary.getValue("meta", "pipeline_name") shouldBe Some(name) + summary.getValue("meta", "last_commit_hash") shouldBe Some(nl.lumc.sasc.biopet.LastCommitHash) + summary.getValue("meta", "pipeline_version") shouldBe Some(nl.lumc.sasc.biopet.Version) + summary.getValue("meta", "output_dir") shouldBe Some(new File(".").getAbsolutePath) + summary.getValue("meta", "summary_creation") should not be None + } + + def createFakeCheckSum(file: File): Unit = { + file.getParentFile.mkdirs() + val writer = new PrintWriter(file) + writer.println("checksum file") + writer.close() + file.deleteOnExit() + } + + @Test + def testEmpty(): Unit = { + val qscript = makeQscript(name = "test") + val writer = makeWriter(qscript) + writer.freezeFieldValues() + writer.deps shouldBe empty + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + } + + @Test + def testMergeQscript(): Unit = { + val qscript = makeQscript(name = "test") + val qscript2 = makeQscript(name = "test2") + qscript.addSummaryQScript(qscript2) + val summaryWriter = new PrintWriter(qscript2.summaryFile) + summaryWriter.println("""{ "test2": "value" }""") + summaryWriter.close() + val writer = makeWriter(qscript) + writer.freezeFieldValues() + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + summary.getValue("test2") shouldBe Some("value") + } + + @Test + def testSingleJob(): Unit = { + val qscript = makeQscript("test") + val writer = makeWriter(qscript) + val summarizable = makeSummarizable(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + summary.getValue("test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue("test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + } + + @Test + def testSingleJavaJob(): Unit = { + val qscript = makeQscript("test") + val writer = makeWriter(qscript) + val summarizable = makeJavaCommand(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.add(summarizable) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + summary.getValue("test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue("test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + summary.getValue("test", "executables", "java_command", "version") shouldBe Some("test version") + } + + @Test + def testVersion(): Unit = { + val qscript = makeQscript("test") + val writer = makeWriter(qscript) + val summarizable = makeVersionSummarizable(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.add(summarizable) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + summary.getValue("test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue("test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + summary.getValue("test", "executables", "version_command", "version") shouldBe Some("test version") + } + + @Test + def testSampleLibrary(): Unit = { + val qscript = makeSampleLibraryQscript("test", s = Some("sampleName"), l = Some("libName")) + val writer = makeWriter(qscript) + val summarizable = makeSummarizable(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.deps shouldBe empty + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test", sampleId = Some("sampleName"), libId = Some("libName")) + summary.getValue(Some("sampleName"), Some("libName"), "test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue(Some("sampleName"), Some("libName"), "test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + } + + @Test + def testSample(): Unit = { + val qscript = makeSampleLibraryQscript("test", s = Some("sampleName")) + val writer = makeWriter(qscript) + val summarizable = makeSummarizable(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.deps shouldBe empty + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test", sampleId = Some("sampleName"), libId = None) + summary.getValue(Some("sampleName"), None, "test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue(Some("sampleName"), None, "test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + } + + @Test + def testMultisampleQscript(): Unit = { + val qscript = makeMultisampleQscript("test", multisampleConfig) + val writer = makeWriter(qscript) + val summarizable = makeSummarizable(files = Map("file_1" -> new File("bla")), stats = Map("key" -> "value")) + qscript.addSummarizable(summarizable, "tool_1") + qscript.addSummaryJobs() + createFakeCheckSum(SummaryQScript.md5sumCache(new File("bla"))) + writer.freezeFieldValues() + writer.deps shouldBe empty + writer.run() + + val summary = new Summary(writer.out) + basicSummaryTest(summary, "test") + summary.getValue("test", "stats", "tool_1", "key") shouldBe Some("value") + summary.getValue("test", "files", "tool_1", "file_1", "md5") shouldBe Some("checksum") + + summary.getValue(Some("sampleName"), Some("libName"), "test") should not be None + } + +} + +object WriteSummaryTest { + def makeWriter(root: Configurable, c: Map[String, Any] = Map()) = new WriteSummary(root) { + override def globalConfig = new Config(c) + override def outputs = Seq() + override def inputs = Seq() + qSettings = new QSettings { + jobName = "test" + jobTempDir = Files.createTempDir() + jobTempDir.deleteOnExit() + jobPriority = Some(1) + } + override def absoluteCommandDirectory() {} + } + + def makeQscript(name: String, + settings: Map[String, Any] = Map(), + files: Map[String, File] = Map(), + c: Map[String, Any] = Map()) = + new SummaryQScript with QScript { + summaryName = name + outputDir = new File(".").getAbsoluteFile + override def globalConfig = new Config(c) + def summarySettings: Map[String, Any] = settings + def summaryFiles: Map[String, File] = files + val tempFile = File.createTempFile("summary", ".json") + tempFile.deleteOnExit() + def summaryFile: File = tempFile + def init(): Unit = {} + def biopetScript(): Unit = {} + def root: Configurable = null + } + + def makeSampleLibraryQscript(name: String, + settings: Map[String, Any] = Map(), + files: Map[String, File] = Map(), + c: Map[String, Any] = Map(), + s: Option[String] = None, + l: Option[String] = None) = + new SummaryQScript with QScript with SampleLibraryTag { + sampleId = s + libId = l + summaryName = "test" + outputDir = new File(".").getAbsoluteFile + override def globalConfig = new Config(c) + def summarySettings: Map[String, Any] = settings + def summaryFiles: Map[String, File] = files + val tempFile = File.createTempFile("summary", ".json") + tempFile.deleteOnExit() + def summaryFile: File = tempFile + def init(): Unit = {} + def biopetScript(): Unit = {} + def root: Configurable = null + } + + def makeMultisampleQscript(name: String, + c: Map[String, Any], + settings: Map[String, Any] = Map(), + files: Map[String, File] = Map()) = + new MultiSampleQScript with QScript { + summaryName = "test" + outputDir = new File(".").getAbsoluteFile + override def globalConfig = new Config(c) + def summarySettings: Map[String, Any] = settings + def summaryFiles: Map[String, File] = files + val tempFile = File.createTempFile("summary", ".json") + tempFile.deleteOnExit() + def summaryFile: File = tempFile + def init(): Unit = {} + def biopetScript(): Unit = {} + def root: Configurable = null + + class Sample(id: String) extends AbstractSample(id) { + class Library(id: String) extends AbstractLibrary(id) { + protected def addJobs(): Unit = {} + def summaryFiles: Map[String, File] = files + def summaryStats: Any = Map() + } + + def makeLibrary(id: String): Library = new Library(id) + protected def addJobs(): Unit = {} + def summaryFiles: Map[String, File] = files + def summaryStats: Any = Map() + } + + def makeSample(id: String): Sample = new Sample(id) + + def addMultiSampleJobs(): Unit = {} + } + + val multisampleConfig = Map("samples" -> Map("sampleName" -> Map("libraries" -> Map("libName" -> Map())))) + + def makeSummarizable(files: Map[String, File] = Map(), stats: Map[String, Any] = Map()) = new Summarizable { + def summaryFiles: Map[String, File] = files + def summaryStats: Any = stats + } + + def makeJavaCommand(files: Map[String, File] = Map(), + stats: Map[String, Any] = Map(), + c: Map[String, Any] = Map()) = new BiopetJavaCommandLineFunction with Summarizable with Version { + override def globalConfig = new Config(c) + override def configName = "java_command" + def root: Configurable = null + def summaryStats: Map[String, Any] = stats + def summaryFiles: Map[String, File] = files + + def versionCommand: String = "echo test version" + def versionRegex: Regex = """(.*)""".r + override def getVersion = Some("test version") + + override def outputs = Seq() + override def inputs = Seq() + qSettings = new QSettings { + jobName = "test" + jobTempDir = Files.createTempDir() + jobTempDir.deleteOnExit() + jobPriority = Some(1) + } + override def absoluteCommandDirectory() {} + } + + def makeVersionSummarizable(files: Map[String, File] = Map(), + stats: Map[String, Any] = Map(), + c: Map[String, Any] = Map()) = + new CommandLineFunction with Configurable with Summarizable with Version { + override def globalConfig = new Config(c) + override def configName = "version_command" + def root: Configurable = null + + def summaryFiles: Map[String, File] = files + def summaryStats: Any = stats + + def versionCommand: String = "echo test version" + def versionRegex: Regex = """(.*)""".r + override def getVersion = Some("test version") + + def commandLine: String = "" + + override def outputs = Seq() + override def inputs = Seq() + qSettings = new QSettings { + jobName = "test" + jobTempDir = Files.createTempDir() + jobTempDir.deleteOnExit() + jobPriority = Some(1) + } + override def absoluteCommandDirectory() {} + } + +} \ No newline at end of file diff --git a/public/biopet-extensions/pom.xml b/public/biopet-extensions/pom.xml index d4483082548a15b58eec526eff896f18d9a558db..26cc102b5df164ac6fc4df51fe89eb33b87ffe27 100644 --- a/public/biopet-extensions/pom.xml +++ b/public/biopet-extensions/pom.xml @@ -46,6 +46,13 @@ <version>2.2.1</version> <scope>test</scope> </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-all</artifactId> + <version>1.9.5</version> + <scope>test</scope> + </dependency> + </dependencies> </project> \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala index 695b5ca3ae5e6943a4da481714a6bf1c7aa7c855..b06cb1bbe9dbafb3974f572b9f31faffef1125df 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cat.scala @@ -31,12 +31,14 @@ class Cat(val root: Configurable) extends BiopetCommandLineFunction { @Output(doc = "Unzipped file", required = true) var output: File = _ + var appending = false + executable = config("exe", default = "cat") /** return commandline to execute */ def cmdLine = required(executable) + (if (inputAsStdin) "" else repeat(input)) + - (if (outputAsStsout) "" else " > " + required(output)) + (if (outputAsStsout) "" else (if (appending) " >> " else " > ") + required(output)) } /** diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Curl.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Curl.scala new file mode 100644 index 0000000000000000000000000000000000000000..62d87a672dd8dc89b0c9adcb78fc7a20aa002982 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Curl.scala @@ -0,0 +1,23 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.Output + +/** + * Created by pjvan_thof on 8/11/15. + */ +class Curl(val root: Configurable) extends BiopetCommandLineFunction with Version { + @Output + var output: File = _ + + var url: String = _ + + executable = config("exe", default = "curl") + def versionCommand = executable + " --version" + def versionRegex = """curl (\w+\.\w+\.\w+) .*""".r + + def cmdLine: String = required(executable) + required(url) + " > " + required(output) +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala index 6e06894f916a5206bcac748d924eb8f3a9f51c53..80743e00d167dd7a37ae9ec5d208cf0a83c7be97 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala @@ -26,18 +26,19 @@ import scala.collection.mutable import scala.io.Source /** - * Extension for cutadept - * Based on version 1.5 + * Extension for cutadapt + * Started with version 1.5 + * Updated to version 1.9 (18-01-2016 by wyleung) */ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Summarizable with Version { @Input(doc = "Input fastq file") - var fastq_input: File = _ + var fastqInput: File = _ @Output - var fastq_output: File = _ + var fastqOutput: File = _ @Output(doc = "Output statistics file") - var stats_output: File = _ + var statsOutput: File = _ executable = config("exe", default = "cutadapt") def versionCommand = executable + " --version" @@ -46,28 +47,121 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su /** Name of the key containing clipped adapters information in the summary stats. */ def adaptersStatsName = "adapters" - var default_clip_mode: String = config("default_clip_mode", default = "3") - var opt_adapter: Set[String] = config("adapter", default = Nil) - var opt_anywhere: Set[String] = config("anywhere", default = Nil) - var opt_front: Set[String] = config("front", default = Nil) - - var opt_discard: Boolean = config("discard", default = false) - var opt_minimum_length: Int = config("minimum_length", 1) - var opt_maximum_length: Option[Int] = config("maximum_length") + var defaultClipMode: String = config("default_clip_mode", default = "3") + var adapter: Set[String] = config("adapter", default = Nil) + var anywhere: Set[String] = config("anywhere", default = Nil) + var front: Set[String] = config("front", default = Nil) + + var errorRate: Option[Double] = config("error_rate") + var noIndels: Boolean = config("no_indels", default = false) + var times: Option[Int] = config("times") + var overlap: Option[Int] = config("overlap") + var matchReadWildcards: Boolean = config("match_read_wildcards", default = false) + var noMatchAdapterWildcards: Boolean = config("no_match_adapter_wildcards", default = false) // specific for 1.9 + + /** Options for filtering of processed reads */ + var discard: Boolean = config("discard", default = false) + var trimmedOnly: Boolean = config("trimmed_only", default = false) + var minimumLength: Int = config("minimum_length", 1) + var maximumLength: Option[Int] = config("maximum_length") + var noTrim: Boolean = config("no_trim", default = false) + var maxN: Option[Int] = config("max_n") // specific for 1.9 + var maskAdapter: Boolean = config("mask_adapter", default = false) + + /** Options that influence what gets output to where */ + var quiet: Boolean = config("quiet", default = false) + // var output: File // see up @Output + var infoFile: Option[File] = config("info_file") + var restFile: Option[File] = config("rest_file") + var wildcardFile: Option[File] = config("wildcard_file") + var tooShortOutput: Option[File] = config("too_short_output") + var tooLongOutput: Option[File] = config("too_long_output") + var untrimmedOutput: Option[File] = config("untrimmed_output") + + /** Additional read modifications */ + var cut: Option[Int] = config("cut") + var qualityCutoff: Option[String] = config("quality_cutoff") + var qualityBase: Option[Int] = config("quality_base") + var trimN: Boolean = config("trim_n", default = false) + var prefix: Option[String] = config("prefix") + var suffix: Option[String] = config("suffix") + var stripSuffix: Set[String] = config("strip_suffix") + var lengthTag: Option[String] = config("length_tag") + + /** Colorspace options */ + var colorspace: Boolean = config("colorspace", default = false) + var doubleEncode: Boolean = config("double_encode", default = false) + var trimPrimer: Boolean = config("trim_primer", default = false) + var stripF3: Boolean = config("strip_f3", default = false) + var maq: Boolean = config("maq", default = false) + var bwa: Boolean = config("bwa", default = false, freeVar = false) + var noZeroCap: Boolean = config("no_zero_cap", default = false) + var zeroCap: Boolean = config("zero_cap", default = false) + + /** Paired end options */ + var peAdapter: Set[String] = config("pe_adapter", default = Nil) + var peAdapterFront: Set[String] = config("pe_adapter_front", default = Nil) + var peAdapterBoth: Set[String] = config("pe_adapter_both", default = Nil) + var peCut: Boolean = config("pe_cut", default = false) + var pairedOutput: Option[File] = config("paired_output") + var interleaved: Boolean = config("interleaved", default = false) + var untrimmedPairedOutput: Option[File] = config("untrimmed_paired_output") /** return commandline to execute */ def cmdLine = required(executable) + - // options - repeat("-a", opt_adapter) + - repeat("-b", opt_anywhere) + - repeat("-g", opt_front) + - conditional(opt_discard, "--discard") + - optional("-m", opt_minimum_length) + - optional("-M", opt_maximum_length) + + // Options that influence how the adapters are found + repeat("-a", adapter) + + repeat("-b", anywhere) + + repeat("-g", front) + + optional("--error-rate", errorRate) + + conditional(noIndels, "--no-indels") + + optional("--times", times) + + optional("--overlap", overlap) + + conditional(matchReadWildcards, "--match-read-wildcards") + + conditional(noMatchAdapterWildcards, "--no-match-adapter-wildcards") + + // Options for filtering of processed reads + conditional(discard, "--discard") + + conditional(trimmedOnly, "--trimmed-only") + + optional("-m", minimumLength) + + optional("-M", maximumLength) + + conditional(noTrim, "--no-trim") + + optional("--max-n", maxN) + + conditional(maskAdapter, "--mask-adapter") + + conditional(quiet, "--quiet") + + optional("--info-file", infoFile) + + optional("--rest-file", restFile) + + optional("--wildcard-file", wildcardFile) + + optional("--too-short-output", tooShortOutput) + + optional("--too-long-output", tooLongOutput) + + optional("--untrimmed-output", untrimmedOutput) + + // Additional read modifications + optional("--cut", cut) + + optional("--quality-cutoff", qualityCutoff) + + conditional(trimN, "--trim-n") + + optional("--prefix", prefix) + + optional("--suffix", suffix) + + optional("--strip-suffix", stripSuffix) + + optional("--length-tag", lengthTag) + + // Colorspace options + conditional(colorspace, "--colorspace") + + conditional(doubleEncode, "--double-encode") + + conditional(trimPrimer, "--trim-primer") + + conditional(stripF3, "--strip-f3") + + conditional(maq, "--maq") + + conditional(bwa, "--bwa") + + conditional(noZeroCap, "--no-zero-cap") + + conditional(zeroCap, "--zero-cap") + + // Paired-end options + repeat("-A", peAdapter) + + repeat("-G", peAdapterFront) + + repeat("-B", peAdapterBoth) + + conditional(interleaved, "--interleaved") + + optional("--paired-output", pairedOutput) + + optional("--untrimmed-paired-output", untrimmedPairedOutput) + // input / output - required(fastq_input) + - (if (outputAsStsout) "" else required("--output", fastq_output) + - " > " + required(stats_output)) + required(fastqInput) + + (if (outputAsStsout) "" else required("--output", fastqOutput) + + " > " + required(statsOutput)) /** Output summary stats */ def summaryStats: Map[String, Any] = { @@ -79,7 +173,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su val stats: mutable.Map[String, Int] = mutable.Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0) val adapter_stats: mutable.Map[String, Int] = mutable.Map() - if (stats_output.exists) for (line <- Source.fromFile(stats_output).getLines()) { + if (statsOutput.exists) for (line <- Source.fromFile(statsOutput).getLines()) { line match { case trimR(m) => stats += ("trimmed" -> m.toInt) case tooShortR(m) => stats += ("tooshort" -> m.toInt) diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala index c066b976f21808e982900f9e8a78fcd651cfe8f6..951d667dad32ab2db616c53f11347db0da6e99b9 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala @@ -132,4 +132,13 @@ object Ln { ln.relative = relative ln } + + def linkBamFile(root: Configurable, input: File, output: File, index: Boolean = true, relative: Boolean = true): List[Ln] = { + val bamLn = Ln(root, input, output, relative) + bamLn :: (if (index) { + val inputIndex = new File(input.getAbsolutePath.stripSuffix(".bam") + ".bai") + val outputIndex = new File(output.getAbsolutePath.stripSuffix(".bam") + ".bai") + List(Ln(root, inputIndex, outputIndex, relative)) + } else Nil) + } } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala index 5f5edbfc4e38b0dccacd017650c818b8aa1c56fb..916e2b4c9d8465426fe693512ae739d2af393979 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Star.scala @@ -62,7 +62,7 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction with Refere var genomeDir: File = null var runmode: String = _ - var sjdbOverhang: Int = _ + var sjdbOverhang: Option[Int] = None var outFileNamePrefix: String = _ var runThreadN: Option[Int] = config("runThreadN") @@ -73,24 +73,24 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction with Refere override def beforeGraph() { super.beforeGraph() if (reference == null) reference = referenceFasta() - genomeDir = config("genomeDir", new File(reference.getAbsoluteFile.getParent, "star")) if (outFileNamePrefix != null && !outFileNamePrefix.endsWith(".")) outFileNamePrefix += "." val prefix = if (outFileNamePrefix != null) outputDir + File.separator + outFileNamePrefix else outputDir + File.separator if (runmode == null) { outputSam = new File(prefix + "Aligned.out.sam") outputTab = new File(prefix + "SJ.out.tab") + genomeDir = config("genomeDir", new File(reference.getAbsoluteFile.getParent, "star")) } else if (runmode == "genomeGenerate") { genomeDir = outputDir outputGenome = new File(prefix + "Genome") outputSA = new File(prefix + "SA") outputSAindex = new File(prefix + "SAindex") - sjdbOverhang = config("sjdboverhang", 75) + sjdbOverhang = config("sjdboverhang") } } /** Returns command to execute */ def cmdLine = { - var cmd: String = required("cd", outputDir) + "&&" + required(executable) + var cmd: String = required("cd", outputDir) + " && " + required(executable) if (runmode != null && runmode == "genomeGenerate") { // Create index cmd += required("--runMode", runmode) + required("--genomeFastaFiles", reference) @@ -100,8 +100,8 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction with Refere cmd += required("--genomeDir", genomeDir) + optional("--sjdbFileChrStartEnd", sjdbFileChrStartEnd) + optional("--runThreadN", threads) + - optional("--outFileNamePrefix", outFileNamePrefix) - if (sjdbOverhang > 0) cmd += optional("--sjdbOverhang", sjdbOverhang) + optional("--outFileNamePrefix", outFileNamePrefix) + + optional("--sjdbOverhang", sjdbOverhang) cmd } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala index a94561d8bc0c2aec368148b20e159e08e2e56ef0..eb39fa864a481f094636474a7ec61396f5c6dcf8 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala @@ -66,6 +66,7 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi private val validFormats: Set[String] = Set("gff", "bed", "sam", "vcf", "psltbl") override def beforeGraph(): Unit = { + super.beforeGraph() p match { case Some(fmt) => require(validFormats.contains(fmt), "-p flag must be one of " + validFormats.mkString(", ")) diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/TarExtract.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/TarExtract.scala new file mode 100644 index 0000000000000000000000000000000000000000..3a0d36c0ff7615b89f632fa51218dd1b215e09af --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/TarExtract.scala @@ -0,0 +1,33 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input } + +/** + * Created by pjvan_thof on 8/11/15. + */ +class TarExtract(val root: Configurable) extends BiopetCommandLineFunction with Version { + @Input(required = true) + var inputTar: File = _ + + @Argument(required = true) + var outputDir: File = _ + + executable = config("exe", default = "tar", freeVar = false) + def versionCommand = executable + " --version" + def versionRegex = """tar \(GNU tar\) (.*)""".r + + override def beforeGraph: Unit = { + super.beforeGraph + jobLocalDir = outputDir + jobOutputFile = new File(outputDir, "." + inputTar.getName + ".tar.out") + } + + def cmdLine: String = required(executable) + + required("-x") + + required("-f", inputTar) + + required("--directory", outputDir) +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala index 2eeb98a412f8885e6cdb2c9c0449feda267f4a70..38efdf63a569935aac5a5385460c146e60613eca 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Zcat.scala @@ -29,6 +29,8 @@ class Zcat(val root: Configurable) extends BiopetCommandLineFunction with Versio @Output(doc = "Unzipped file", required = true) var output: File = _ + var appending = false + executable = config("exe", default = "zcat") def versionRegex = """zcat \(gzip\) (.*)""".r @@ -37,7 +39,7 @@ class Zcat(val root: Configurable) extends BiopetCommandLineFunction with Versio /** Returns command to execute */ def cmdLine = required(executable) + (if (inputAsStdin) "" else repeat(input)) + - (if (outputAsStsout) "" else " > " + required(output)) + (if (outputAsStsout) "" else (if (appending) " >> " else " > ") + required(output)) } object Zcat { diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsView.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsView.scala new file mode 100644 index 0000000000000000000000000000000000000000..4bb87d332e15abc0e93f71f43e2329eb574e95ca --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsView.scala @@ -0,0 +1,184 @@ +package nl.lumc.sasc.biopet.extensions.bcftools + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } + +/** + * Created by ahbbollen on 12-10-15. + */ +class BcftoolsView(val root: Configurable) extends Bcftools { + + @Input(doc = "Input VCF file") + var input: File = _ + + @Output(doc = "Output file") + var output: File = _ + + @Argument(doc = "drop individual genotypes", required = false) + var dropGenotype: Boolean = config("drop_genotype", default = false) + + @Argument(doc = "header only", required = false) + var headerOnly: Boolean = config("header_only", false) + + @Argument(doc = "Compression level", required = false) + var compressionLevel: Int = config("compression_level", default = 9) + + @Argument(doc = "output type", required = false) + var outputType: String = "z" + + @Argument(doc = "regions", required = false) + var regions: Option[String] = config("r") + + @Argument(doc = "region file", required = false) + var regionFile: Option[File] = config("R") + + @Argument(doc = "targets", required = false) + var targets: Option[String] = config("t") + + @Argument(doc = "targets file", required = false) + var targetFile: Option[File] = config("T") + + @Argument(doc = "trim alt alleles", required = false) + var trimAltAlleles: Boolean = config("trim_alt_allele", default = false) + + @Argument(doc = "no update", required = false) + var noUpdate: Boolean = config("no_update", default = false) + + @Argument(doc = "samples", required = false) + var samples: List[String] = config("s", default = Nil) + + @Argument(doc = "samples file", required = false) + var sampleFile: Option[File] = config("S") + + @Argument(doc = "minimum allele count", required = false) + var minAC: Option[Int] = config("c") + + @Argument(doc = "max allele count", required = false) + var maxAC: Option[Int] = config("C") + + @Argument(doc = "exclude (expression)", required = false) + var exclude: Option[String] = config("e") + + @Argument(doc = "apply filters", required = false) + var applyFilters: List[String] = config("F", default = Nil) + + @Argument(doc = "genotype", required = false) + var genotype: Option[String] = config("g") + + @Argument(doc = "include (expression)", required = false) + var include: Option[String] = config("i") + + @Argument(doc = "Known (ID field is not .) only", required = false) + var known: Boolean = config("k", default = false) + + @Argument(doc = "min alleles", required = false) + var minAlleles: Option[Int] = config("m") + + @Argument(doc = "max alleles", required = false) + var maxAlleles: Option[Int] = config("M") + + @Argument(doc = "novel (ID field is .) only", required = false) + var novel: Boolean = config("n", false) + + @Argument(doc = "phased only", required = false) + var phased: Boolean = config("p", false) + + @Argument(doc = "exclude phased (only)", required = false) + var excludePhased: Boolean = config("P", false) + + @Argument(doc = "min allele frequency", required = false) + var minAF: Option[Int] = config("q") + + @Argument(doc = "max allele frequency", required = false) + var maxAF: Option[Int] = config("Q") + + @Argument(doc = "uncalled only", required = false) + var uncalled: Boolean = config("u", default = false) + + @Argument(doc = "exclude uncalled (only)", required = false) + var excludeUncalled: Boolean = config("U", default = false) + + @Argument(doc = "types", required = false) + var types: Option[String] = config("v") + + @Argument(doc = "exclude types", required = false) + var excludeTypes: Option[String] = config("V") + + @Argument(doc = "private (requires samples)", required = false) + var onlyPrivate: Boolean = config("x", default = false) + + @Argument(doc = "Exclude privates", required = false) + var excludePrivate: Boolean = config("X", default = false) + + override def beforeGraph() = { + super.beforeGraph() + + require((compressionLevel <= 9) && (compressionLevel >= 0)) + require( + (outputType.length == 1) && + (outputType == "z" || outputType == "b" || outputType == "u" || outputType == "v") + ) + } + + def baseCmd = { + executable + + required("view") + + conditional(dropGenotype, "-G") + + conditional(headerOnly, "-h") + + required("-l", compressionLevel) + + required("-O", outputType) + + optional("-r", regions) + + optional("-R", regionFile) + + optional("-t", targets) + + optional("-T", targetFile) + + conditional(trimAltAlleles, "-a") + + conditional(noUpdate, "-I") + + repeat("-s", samples) + + optional("-S", sampleFile) + + optional("-c", minAC) + + optional("-C", maxAC) + + optional("-e", exclude) + + optional("-f", applyFilters) + + optional("-g", genotype) + + optional("-i", include) + + conditional(known, "-k") + + optional("-m", minAlleles) + + optional("-M", maxAlleles) + + conditional(novel, "-n") + + conditional(phased, "-p") + + conditional(excludePhased, "-P") + + optional("-q", minAF) + + optional("-Q", maxAF) + + conditional(uncalled, "-u") + + conditional(excludeUncalled, "-U") + + optional("-v", types) + + optional("-V", excludeTypes) + + conditional(onlyPrivate, "-x") + + conditional(excludePrivate, "-X") + } + + def cmdPipeInput = { + baseCmd + "-" + } + + def cmdPipe = { + baseCmd + required(input) + } + + def cmdLine = { + baseCmd + required("-o", output) + required(input) + } + + /** + * Convert cmdLine into line without quotes and double spaces + * primarily for testing + * @return + */ + final def cmd = { + val a = cmdLine + a.replace("'", "").replace(" ", " ").trim + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala new file mode 100644 index 0000000000000000000000000000000000000000..f72411a6ab7b83c7661d4f36ec38d12bca3136c1 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala @@ -0,0 +1,28 @@ +package nl.lumc.sasc.biopet.extensions.bedtools + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output, Input } + +/** + * Created by ahbbollen on 5-1-16. + */ +class BedtoolsMerge(val root: Configurable) extends Bedtools { + + @Input(doc = "Input bed file") + var input: File = _ + + @Argument(doc = "Distance") + var dist: Option[Int] = config("dist") //default of tool is 1 + + @Output(doc = "Output bed file") + var output: File = _ + + def cmdLine = { + required(executable) + required("merge") + + required("-i", input) + optional("-d", dist) + + " > " + required(output) + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie.scala similarity index 98% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala rename to public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie.scala index 2733ba975b7ea05ef8ade6d534ea48ec711c0c72..429c65b985980f2874dc53ff4a97c546d4c29f13 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bowtie.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie.scala @@ -13,7 +13,7 @@ * license; For commercial users or users who do not want to follow the AGPL * license, please contact us to obtain a separate license. */ -package nl.lumc.sasc.biopet.extensions +package nl.lumc.sasc.biopet.extensions.bowtie import java.io.File diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2.scala new file mode 100644 index 0000000000000000000000000000000000000000..793fd7d159a64c5f432afd575d5a04b7d4fa7c09 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2.scala @@ -0,0 +1,235 @@ +package nl.lumc.sasc.biopet.extensions.bowtie + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference, Version } +import nl.lumc.sasc.biopet.utils.Logging +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +/** + * Extension for bowtie 2 + * + * Based on version 2.2.6 + */ +class Bowtie2(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version { + @Input(doc = "Fastq file R1", shortName = "R1") + var R1: File = null + + @Input(doc = "Fastq file R2", shortName = "R2", required = false) + var R2: Option[File] = None + + @Output(doc = "Output file SAM", shortName = "output", required = true) + var output: File = null + + executable = config("exe", default = "bowtie2", freeVar = false) + def versionRegex = """.*[Vv]ersion:? (.*)""".r + override def versionExitcode = List(0, 1) + def versionCommand = executable + " --version" + + override def defaultCoreMemory = 4.0 + override def defaultThreads = 4 + + /* Required */ + var bowtieIndex: String = config("bowtie_index") + + /* Input options */ + var q: Boolean = config("q", default = false) + var qseq: Boolean = config("qseq", default = false) + var f: Boolean = config("f", default = false) + var r: Boolean = config("r", default = false) + var c: Boolean = config("c", default = false) + var skip: Option[Int] = config("skip") + var upto: Option[Int] = config("upto") + var trim5: Option[Int] = config("trim5") + var trim3: Option[Int] = config("trim3") + var phred33: Boolean = config("phred33", default = false) + var phred64: Boolean = config("phred64", default = false) + var int_quals: Boolean = config("int_quals", default = false) + + /* Alignment options */ + var N: Option[Int] = config("N") + var L: Option[Int] = config("L") + var i: Option[String] = config("i") + var n_ceil: Option[String] = config("n_ceil") + var dpad: Option[Int] = config("dpad") + var gbar: Option[Int] = config("gbar") + var ignore_quals: Boolean = config("ignore_quals", default = false) + var nofw: Boolean = config("nofw", default = false) + var norc: Boolean = config("norc", default = false) + var no_1mm_upfront: Boolean = config("no_1mm_upfront", default = false) + var end_to_end: Boolean = config("end_to_end", default = false) + var local: Boolean = config("local", default = false) + + /* Scoring */ + var ma: Option[Int] = config("ma") + var mp: Option[Int] = config("mp") + var np: Option[Int] = config("np") + var rdg: Option[String] = config("rdg") + var rfg: Option[String] = config("rfg") + var score_min: Option[String] = config("score_min") + + /* Reporting */ + var k: Option[Int] = config("k") + var all: Option[Int] = config("all") + + /* Effort */ + var D: Option[Int] = config("D") + var R: Option[Int] = config("R") + + /* Paired-end */ + var minins: Option[Int] = config("minins") + var maxins: Option[Int] = config("maxins") + var fr: Boolean = config("fr", default = false) + var rf: Boolean = config("rf", default = false) + var ff: Boolean = config("ff", default = false) + var no_mixed: Boolean = config("no_mixed", default = false) + var no_discordant: Boolean = config("no_discordant", default = false) + var no_dovetail: Boolean = config("no_dovetail", default = false) + var no_contain: Boolean = config("no_contain", default = false) + var no_overlap: Boolean = config("no_overlap", default = false) + + /* Output */ + var time: Boolean = config("no_overlap", default = false) + + var un: Option[String] = config("un") + var al: Option[String] = config("al") + var un_conc: Option[String] = config("un_conc") + var al_conc: Option[String] = config("al_conc") + + var un_gz: Option[String] = config("un_gz") + var al_gz: Option[String] = config("al_gz") + var un_conc_gz: Option[String] = config("un_conc_gz") + var al_conc_gz: Option[String] = config("al_conc_gz") + + var un_bz2: Option[String] = config("un_bz2") + var al_bz2: Option[String] = config("al_bz2") + var un_conc_bz2: Option[String] = config("un_conc_bz2") + var al_conc_bz2: Option[String] = config("al_conc_bz2") + + var quiet: Boolean = config("quiet", default = false) + var met_file: Option[String] = config("met_file") + var met_stderr: Boolean = config("met_stderr", default = false) + var met: Option[Int] = config("met") + + var no_unal: Boolean = config("no_unal", default = false) + var no_head: Boolean = config("no_head", default = false) + var no_sq: Boolean = config("no_sq", default = false) + + var rg_id: Option[String] = config("rg_id") + var rg: List[String] = config("rg", default = Nil) + + var omit_sec_seq: Boolean = config("omit_sec_seq", default = false) + + /* Performance */ + var reorder: Boolean = config("reorder", default = false) + var mm: Boolean = config("mm", default = false) + + /* Other */ + var qc_filter: Boolean = config("qc_filter", default = false) + var seed: Option[Int] = config("seed") + var non_deterministic: Boolean = config("non_deterministic", default = false) + + override def beforeGraph() { + super.beforeGraph() + val indexDir = new File(bowtieIndex).getParentFile + val basename = bowtieIndex.stripPrefix(indexDir.getPath + File.separator) + if (indexDir.exists()) { + if (!indexDir.list().toList.filter(_.startsWith(basename)).exists(_.endsWith(".bt2"))) + Logging.addError(s"No index files found for bowtie2 in: $indexDir with basename: $basename") + } + } + + /** return commandline to execute */ + def cmdLine = required(executable) + + conditional(q, "-q") + + conditional(qseq, "--qseq") + + conditional(f, "-f") + + conditional(r, "-r") + + conditional(c, "-c") + + optional("--skip", skip) + + optional("--upto", upto) + + optional("--trim3", trim3) + + optional("--trim5", trim5) + + conditional(phred33, "--phred33") + + conditional(phred64, "--phred64") + + conditional(int_quals, "--int-quals") + + /* Alignment options */ + optional("-N", N) + + optional("-L", L) + + optional("-i", i) + + optional("--n-ceil", n_ceil) + + optional("--dpad", dpad) + + optional("--gbar", gbar) + + conditional(ignore_quals, "--ignore-quals") + + conditional(nofw, "--nofw") + + conditional(norc, "--norc") + + conditional(no_1mm_upfront, "--no-1mm-upfront") + + conditional(end_to_end, "--end-to-end") + + conditional(local, "--local") + + /* Scoring */ + optional("--ma", ma) + + optional("--mp", mp) + + optional("--np", np) + + optional("--rdg", rdg) + + optional("--rfg", rfg) + + optional("--score-min", score_min) + + /* Reporting */ + optional("-k", k) + + optional("--all", all) + + /* Effort */ + optional("-D", D) + + optional("-R", R) + + /* Paired End */ + optional("--minins", minins) + + optional("--maxins", maxins) + + conditional(fr, "--fr") + + conditional(rf, "--rf") + + conditional(ff, "--ff") + + conditional(no_mixed, "--no-mixed") + + conditional(no_discordant, "--no-discordant") + + conditional(no_dovetail, "--no-dovetail") + + conditional(no_contain, "--no-contain") + + conditional(no_overlap, "--no-overlap") + + /* Output */ + conditional(time, "--time") + + optional("--un", un) + + optional("--al", al) + + optional("--un-conc", un_conc) + + optional("--al-conc", al_conc) + + optional("--un-gz", un_gz) + + optional("--al-gz", al_gz) + + optional("--un-conc-gz", un_conc_gz) + + optional("--al-conc-gz", al_conc_gz) + + optional("--un-bz2", un_bz2) + + optional("--al-bz2", al_bz2) + + optional("--un-conc-bz2", un_conc_bz2) + + optional("--al-conc-bz2", al_conc_bz2) + + conditional(quiet, "--quiet") + + optional("--met-file", met_file) + + conditional(met_stderr, "--met-stderr") + + optional("--met", met) + + conditional(no_unal, "--no-unal") + + conditional(no_head, "--no-head") + + conditional(no_sq, "--no-sq") + + optional("--rg-id", rg_id) + + repeat("--rg", rg) + + conditional(omit_sec_seq, "--omit-sec-seq") + + /* Performance */ + optional("--threads", threads) + + conditional(reorder, "--reorder") + + conditional(mm, "--mm") + + /* Other */ + conditional(qc_filter, "--qc-filter") + + optional("--seed", seed) + + conditional(non_deterministic, "--non-deterministic") + + /* Required */ + required("-x", bowtieIndex) + + (R2 match { + case Some(r2) => + required("-1", R1) + + optional("-2", r2) + case _ => required("-U", R1) + }) + + (if (outputAsStsout) "" else required("-S", output)) +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2Build.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2Build.scala new file mode 100644 index 0000000000000000000000000000000000000000..0e8ce7b9faeaf3f9c4ed0ec4a6342ed3ebb82f93 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2Build.scala @@ -0,0 +1,34 @@ +package nl.lumc.sasc.biopet.extensions.bowtie + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input } + +/** + * Created by pjvan_thof on 8/15/15. + */ +class Bowtie2Build(val root: Configurable) extends BiopetCommandLineFunction with Version { + @Input(required = true) + var reference: File = _ + + @Argument(required = true) + var baseName: String = _ + + executable = config("exe", default = "bowtie2-build", freeVar = false) + def versionRegex = """.*[Vv]ersion:? (\d*\.\d*\.\d*)""".r + def versionCommand = executable + " --version" + + override def defaultCoreMemory = 15.0 + + override def beforeGraph: Unit = { + outputFiles ::= new File(reference.getParentFile, baseName + ".1.bt2") + outputFiles ::= new File(reference.getParentFile, baseName + ".2.bt2") + } + + def cmdLine = required("cd", reference.getParentFile) + " && " + + required(executable) + + required(reference) + + required(baseName) +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/BowtieBuild.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/BowtieBuild.scala new file mode 100644 index 0000000000000000000000000000000000000000..ed4c8b830aafb07a1a0ebfb83d88f6331a061b1f --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/BowtieBuild.scala @@ -0,0 +1,34 @@ +package nl.lumc.sasc.biopet.extensions.bowtie + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input } + +/** + * Created by pjvan_thof on 8/15/15. + */ +class BowtieBuild(val root: Configurable) extends BiopetCommandLineFunction with Version { + @Input(required = true) + var reference: File = _ + + @Argument(required = true) + var baseName: String = _ + + executable = config("exe", default = "bowtie-build", freeVar = false) + def versionRegex = """.*[Vv]ersion:? (\d*\.\d*\.\d*)""".r + def versionCommand = executable + " --version" + + override def defaultCoreMemory = 15.0 + + override def beforeGraph: Unit = { + outputFiles ::= new File(reference.getParentFile, baseName + ".1.ebwt") + outputFiles ::= new File(reference.getParentFile, baseName + ".2.ebwt") + } + + def cmdLine = required("cd", reference.getParentFile) + " && " + + required(executable) + + required(reference) + + required(baseName) +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaIndex.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaIndex.scala new file mode 100644 index 0000000000000000000000000000000000000000..d1856275a4b917bc022aafee5608defcf864eb24 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bwa/BwaIndex.scala @@ -0,0 +1,59 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.bwa + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +/** + * Extension for bwa aln + * + * Based on version 0.7.12-r1039 + * + * Created by pjvan_thof on 1/16/15. + */ +class BwaIndex(val root: Configurable) extends Bwa { + @Input(doc = "Fastq file", required = true) + var reference: File = _ + + @Output(doc = "Index files for bwa", required = false) + private var output: List[File] = Nil + + var a: Option[String] = config("a", freeVar = false) + var p: Option[String] = config("p", freeVar = false) + var b: Option[Int] = config("e", freeVar = false) + var _6: Boolean = config("6", default = false, freeVar = false) + + override def defaultCoreMemory = 35.0 + + override def beforeGraph() { + super.beforeGraph() + List(".sa", ".pac") + .foreach(ext => output ::= new File(reference.getAbsolutePath + ext)) + output = output.distinct + } + + /** Returns command to execute */ + def cmdLine = required(executable) + + required("index") + + optional("-a", a) + + optional("-p", p) + + optional("-b", b) + + conditional(_6, "-6") + + required(reference) +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/GmapBuild.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/GmapBuild.scala new file mode 100644 index 0000000000000000000000000000000000000000..45d27a11e674e33f4b8fea4df164709a35459973 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/GmapBuild.scala @@ -0,0 +1,60 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.gmap + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.Input + +/** + * Wrapper for the gsnap command line tool + * Written based on gsnap version 2014-05-15 + */ +class GmapBuild(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version { + + /** default executable */ + executable = config("exe", default = "gmap_build", freeVar = false) + + /** input file */ + @Input(doc = "Input fasta files", required = true) //var input: List[File] = _ + var fastaFiles: List[File] = Nil + + /** genome directory */ + var dir: File = _ + + /** genome database */ + var db: String = _ + + override def defaultCoreMemory = 25.0 + + def versionRegex = """.* version (.*)""".r + def versionCommand = executable + override def versionExitcode = List(0, 1, 255) + + override def beforeGraph: Unit = { + super.beforeGraph + jobOutputFile = new File(dir, ".log.out") + } + + def cmdLine = { + required(executable) + + required("--dir", dir) + + optional("--db", db) + + repeat(fastaFiles) + } +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gsnap.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/Gsnap.scala similarity index 99% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gsnap.scala rename to public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/Gsnap.scala index c640da9bc3b82cf1495abd4d8af69b6d1c8bbed5..7b92be3b16946a6ae74dbe1537031a0ac864e534 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Gsnap.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gmap/Gsnap.scala @@ -13,7 +13,7 @@ * license; For commercial users or users who do not want to follow the AGPL * license, please contact us to obtain a separate license. */ -package nl.lumc.sasc.biopet.extensions +package nl.lumc.sasc.biopet.extensions.gmap import java.io.File @@ -334,7 +334,7 @@ class Gsnap(val root: Configurable) extends BiopetCommandLineFunction with Refer override def beforeGraph(): Unit = { super.beforeGraph() if ((!gunzip && !bunzip2) && input.forall(_.getName.endsWith(".gz"))) { - logger.info("Fastq with .gz extension found, enabled --gunzip option") + logger.debug("Fastq with .gz extension found, enabled --gunzip option") gunzip = true } } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/Manwe.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/Manwe.scala new file mode 100644 index 0000000000000000000000000000000000000000..e2566088d254a9e467d1a00a90e477e431c6d534 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/Manwe.scala @@ -0,0 +1,83 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.{ PrintWriter, File } + +import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +import org.broadinstitute.gatk.utils.commandline.{ Output, Argument } + +/** + * Created by ahbbollen on 23-9-15. + * This is python, but not accessed like a script; i.e. called by simply + * manwe [subcommand] + */ +abstract class Manwe extends BiopetCommandLineFunction { + executable = config("exe", default = "manwe", submodule = "manwe") + + var manweConfig: File = createManweConfig(None) + + @Output(doc = "the output file") + var output: File = _ + + var manweHelp: Boolean = false + + def subCommand: String + + final def cmdLine = { + manweConfig = createManweConfig(Option(output).map(_.getParentFile)) + required(executable) + + subCommand + + required("-c", manweConfig) + + conditional(manweHelp, "-h") + + " > " + + required(output) + + } + + /** + * Convert cmdLine into line without quotes and double spaces + * primarily for testing + * @return + */ + final def cmd = { + val a = cmdLine + a.replace("'", "").replace(" ", " ").trim + } + + /** + * Create Manwe config from biopet config + * @return Manwe config file + */ + def createManweConfig(directory: Option[File]): File = { + val url: String = config("varda_root") + val token: String = config("varda_token") + val sslSettings: Option[String] = config("varda_verify_certificate") + val collectionCacheSize: Option[Int] = config("varda_cache_size", default = 20) + val dataBufferSize: Option[Int] = config("varda_buffer_size", default = 1024 * 1024) + val taskPollWait: Option[Int] = config("varda_task_poll_wait", default = 2) + + val settingsMap: Map[String, Any] = Map( + "API_ROOT" -> s"'$url'", + "TOKEN" -> s"'$token'", + "VERIFY_CERTIFICATE" -> (sslSettings match { + case Some("true") => "True" + case Some("false") => "False" + case Some(x) => s"'$x'" + case _ => "True" + }), + "COLLECTION_CACHE_SIZE" -> collectionCacheSize.getOrElse(20), + "DATA_BUFFER_SIZE" -> dataBufferSize.getOrElse(1048576), + "TASK_POLL_WAIT" -> taskPollWait.getOrElse(2) + ) + + val file = directory match { + case Some(dir) => File.createTempFile("manwe_config", ".py", dir) + case None => File.createTempFile("manwe_config", ".py") + } + + file.deleteOnExit() + val writer = new PrintWriter(file) + settingsMap.foreach { case (key, value) => writer.println(s"$key = $value") } + writer.close() + file + } +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateBed.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateBed.scala new file mode 100644 index 0000000000000000000000000000000000000000..f3c3fbb1e22a019a18b7eb8caa6d262e2f3347f8 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateBed.scala @@ -0,0 +1,32 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweAnnotateBed(val root: Configurable) extends Manwe { + + @Input(doc = "the bed to annotate") + var bed: File = _ + + @Argument(doc = "flag if data has already been uploaded") + var alreadyUploaded: Boolean = false + + @Argument(doc = "Flag whether to wait for annotation to complete on the server") + var waitToComplete: Boolean = false + + @Argument(doc = "annotation queries", required = false) + var queries: List[String] = Nil + + def subCommand = { + required("annotate-bed") + required(bed) + + conditional(alreadyUploaded, "-u") + + repeat("-q", queries) + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateVcf.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateVcf.scala new file mode 100644 index 0000000000000000000000000000000000000000..64a849a536d4695facbfd290379426fd3d646287 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateVcf.scala @@ -0,0 +1,32 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweAnnotateVcf(val root: Configurable) extends Manwe { + + @Input(doc = "the vcf to annotate") + var vcf: File = _ + + @Argument(doc = "flag if data has already been uploaded") + var alreadyUploaded: Boolean = false + + @Argument(doc = "flag whether to wait for annotation to complete") + var waitToComplete: Boolean = false + + @Argument(doc = "annotation queries", required = false) + var queries: List[String] = Nil + + def subCommand = { + required("annotate-vcf") + required(vcf) + + conditional(alreadyUploaded, "-u") + + repeat("-q", queries) + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesAnnotate.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesAnnotate.scala new file mode 100644 index 0000000000000000000000000000000000000000..0980b4c23d4450c95b17baa8c1d2f52bfb6a63d5 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesAnnotate.scala @@ -0,0 +1,27 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.Argument + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweDataSourcesAnnotate(val root: Configurable) extends Manwe { + + @Argument(doc = "uri to data source to annotate") + var uri: Option[String] = _ + + @Argument(doc = "list of queries", required = false) + var queries: List[String] = Nil + + @Argument(doc = "Flag whether to wait for annotation to complete on server") + var waitToComplete: Boolean = false + + def subCommand = { + required("data-sources") + required("annotate") + + required(uri) + + repeat("-q", queries) + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesDownload.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesDownload.scala new file mode 100644 index 0000000000000000000000000000000000000000..d55dd6a4f8c8c181bd565c60d085b938a08fe4a0 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesDownload.scala @@ -0,0 +1,19 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.Argument + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweDataSourcesDownload(val root: Configurable) extends Manwe { + + @Argument(doc = "uri to data source to download") + var uri: String = _ + + def subCommand = { + required("data-sources") + + required("download") + + required(uri) + } +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesList.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesList.scala new file mode 100644 index 0000000000000000000000000000000000000000..dbcb7b46913e7bb66ba41d74bc141e3ed861a0b0 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesList.scala @@ -0,0 +1,23 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweDataSourcesList(val root: Configurable) extends Manwe { + + @Argument(doc = "User uri to filter by") + var user: Option[String] = _ + + def subCommand = { + required("data-sources") + + required("list") + + optional("-u", user) + } + +} + diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesShow.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesShow.scala new file mode 100644 index 0000000000000000000000000000000000000000..662bb51b79e292a352d524ca2437dd953737d894 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesShow.scala @@ -0,0 +1,22 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweDataSourcesShow(val root: Configurable) extends Manwe { + + @Argument(doc = "uri of data source") + var uri: Option[String] = _ + + def subCommand = { + required("data-sources") + + required("show") + + required(uri) + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesActivate.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesActivate.scala new file mode 100644 index 0000000000000000000000000000000000000000..3815d64a258dd52b354bb2be2be1610d6eb2d4cb --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesActivate.scala @@ -0,0 +1,22 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesActivate(val root: Configurable) extends Manwe { + + @Argument(doc = "uri to sample to activate") + var uri: String = _ + + def subCommand = { + required("samples") + + required("activate") + + required(uri) + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAdd.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAdd.scala new file mode 100644 index 0000000000000000000000000000000000000000..fc5cababd9c141208b2a8ec945895babaac0dc4a --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAdd.scala @@ -0,0 +1,30 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesAdd(val root: Configurable) extends Manwe { + + @Argument(doc = "the sample name") + var name: Option[String] = _ + + @Argument(doc = "the sample groups [uris]", required = false) + var group: List[String] = Nil + + @Argument(doc = "pool size") + var poolSize: Option[Int] = _ + + def subCommand = { + required("samples") + + required("add") + + required(name) + + optional("-s", poolSize) + + repeat("-g", group) + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAnnotateVariations.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAnnotateVariations.scala new file mode 100644 index 0000000000000000000000000000000000000000..04092cc14b1cd39fd8d8a006c77dfadfa9163627 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAnnotateVariations.scala @@ -0,0 +1,26 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesAnnotateVariations(val root: Configurable) extends Manwe { + + @Argument(doc = "uri to sample to annotate") + var uri: Option[String] = _ + + @Argument(doc = "Annotation queries", required = false) + var queries: List[String] = Nil + + def subCommand = { + required("samples") + + required("annotate-variations") + + required(uri) + + repeat("-q", queries) + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImport.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImport.scala new file mode 100644 index 0000000000000000000000000000000000000000..e683f2c2a7d8e31481becf2d9ed4b6567efc744f --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImport.scala @@ -0,0 +1,62 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesImport(val root: Configurable) extends Manwe { + + /** + * Creates sample and imports vcf and bed files immediately + */ + + @Argument(doc = "name of sample", required = true) + var name: Option[String] = _ + + @Argument(doc = "Group uris", required = false) + var group: List[String] = Nil + + @Input(doc = "Vcf files to upload", required = false) + var vcfs: List[File] = Nil + + @Input(doc = "BED files to upload", required = false) + var beds: List[File] = Nil + + @Argument(doc = "flag for data already uploaded", required = false) + var alreadyUploaded: Boolean = false + + @Argument(doc = "flag to mark sample as public", required = false) + var public: Boolean = false + + @Argument(doc = "flag if sample has no coverage profile", required = false) + var noCoverage: Boolean = false + + @Argument(doc = "Prefer genotypes derived from likelihood (PL) fields in stead of GT field", required = false) + var preferLikelihood: Boolean = false + + @Argument(doc = "Pool size", required = false) + var poolSize: Option[Int] = _ + + @Argument(doc = " Flag whether to wait for import to complete on server", required = false) + var waitToComplete: Boolean = false + + def subCommand = { + required("samples") + + required("import") + + required(name) + + repeat("-g", group) + + repeat("--vcf", vcfs) + + repeat("--bed", beds) + + optional("-s", poolSize) + + conditional(alreadyUploaded, "-u") + + conditional(public, "-p") + + conditional(preferLikelihood, "-l") + + conditional(noCoverage, "--no-coverage-profile") + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportBed.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportBed.scala new file mode 100644 index 0000000000000000000000000000000000000000..9eb3f12d0974d95af940b0e34f3b930fd06ba0a1 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportBed.scala @@ -0,0 +1,38 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesImportBed(val root: Configurable) extends Manwe { + + /** + * Import bed for existing sample + */ + + @Argument(doc = "uri of sample to upload to") + var uri: Option[String] = _ + + @Argument(doc = "path to VCF file to upload") + var bed: File = _ + + @Argument(doc = "flag if data is already uploaded?") // TODO: What is the use of this flag even? We're specifically uploading with this command + var alreadyUploaded: Boolean = false + + @Argument(doc = " Flag whether to wait for import to complete on server") + var waitToComplete: Boolean = false + + def subCommand = { + required("samples") + + required("import-bed") + + required(uri) + + required(bed) + + conditional(alreadyUploaded, "-u") + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportVcf.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportVcf.scala new file mode 100644 index 0000000000000000000000000000000000000000..61f6a5f223b4af57d6066ae8f80cb0164cb8cd53 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportVcf.scala @@ -0,0 +1,41 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesImportVcf(val root: Configurable) extends Manwe { + + /** + * Import vcf for existing sample + */ + + @Argument(doc = "uri of sample to upload to") + var uri: Option[String] = _ + + @Argument(doc = "path to VCF file to upload") + var vcf: File = _ + + @Argument(doc = "flag if data is already uploaded?") // TODO: What is the use of this flag even? We're specifically uploading with this command + var alreadyUploaded: Boolean = false + + @Argument(doc = "Flag when to prefer genotype likelihoods") + var preferLikelihoods: Boolean = false + + @Argument(doc = " Flag whether to wait for import to complete on server") + var waitToComplete: Boolean = false + + def subCommand = { + required("samples") + + required("import-vcf") + + required(uri) + required(vcf) + + conditional(alreadyUploaded, "-u") + + conditional(preferLikelihoods, "-l") + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesList.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesList.scala new file mode 100644 index 0000000000000000000000000000000000000000..d79b85fe1048210830d532091e4d588dac9c96d4 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesList.scala @@ -0,0 +1,29 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 23-9-15. + */ +class ManweSamplesList(val root: Configurable) extends Manwe { + + @Argument(doc = "filter by user URI", required = false) + var user: Option[String] = None + + @Argument(doc = "filter by group URI", required = false) + var group: List[String] = Nil + + var onlyPublic: Boolean = false + + def subCommand = { + required("samples") + + required("list") + + optional("-u", user) + + repeat("-g", group) + + conditional(onlyPublic, "-p") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesShow.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesShow.scala new file mode 100644 index 0000000000000000000000000000000000000000..6a73f84a44c44505509fe993a6fbca92da393b66 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesShow.scala @@ -0,0 +1,22 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesShow(val root: Configurable) extends Manwe { + + @Argument(doc = "The sample to show") + var uri: Option[String] = _ + + def subCommand = { + required("samples") + + required("show") + + required(uri) + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CreateSequenceDictionary.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CreateSequenceDictionary.scala new file mode 100644 index 0000000000000000000000000000000000000000..9acd2d314b70a450d6d25239e0c45ce4f4bca82d --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CreateSequenceDictionary.scala @@ -0,0 +1,47 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.picard + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +class CreateSequenceDictionary(val root: Configurable) extends Picard { + + javaMainClass = new picard.sam.CreateSequenceDictionary().getClass.getName + + @Input(required = true) + var reference: File = _ + + @Output(required = true) + var output: File = _ + + var genomeAssembly: Option[String] = config("genomeAssembly") + var uri: Option[String] = config("uri") + var species: Option[String] = config("species") + var truncateAtWhiteSpace: Boolean = config("truncateAtWhiteSpace", default = false) + var numSequences: Option[Int] = config("numSequences") + + override def cmdLine = super.cmdLine + + required("REFERENCE=", reference, spaceSeparated = false) + + required("OUTPUT=", output, spaceSeparated = false) + + optional("GENOME_ASSEMBLY=", genomeAssembly, spaceSeparated = false) + + optional("URI=", uri, spaceSeparated = false) + + optional("SPECIES=", species, spaceSeparated = false) + + conditional(truncateAtWhiteSpace, "TRUNCATE_NAMES_AT_WHITESPACE=true") + + optional("NUM_SEQUENCES=", numSequences, spaceSeparated = false) +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala index 04a61ae0f9af329ac49252f93ccad829c60b7088..8ad769f9cd68bc530b3977684073188c07479154 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala @@ -105,11 +105,12 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable { } object MarkDuplicates { /** Returns default MarkDuplicates */ - def apply(root: Configurable, input: List[File], output: File): MarkDuplicates = { + def apply(root: Configurable, input: List[File], output: File, isIntermediate: Boolean = false): MarkDuplicates = { val markDuplicates = new MarkDuplicates(root) markDuplicates.input = input markDuplicates.output = output markDuplicates.outputMetrics = new File(output.getParent, output.getName.stripSuffix(".bam") + ".metrics") + markDuplicates.isIntermediate = isIntermediate markDuplicates } } \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala index 5e90a5a625c992436a340a874bf60015c19b44ab..916256b4908a16b2216a6fd41eb853c3300e9609 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala @@ -65,12 +65,14 @@ class MergeSamFiles(val root: Configurable) extends Picard { object MergeSamFiles { /** Returns default MergeSamFiles */ - def apply(root: Configurable, input: List[File], outputDir: File, sortOrder: String = null): MergeSamFiles = { + def apply(root: Configurable, input: List[File], outputFile: File, + sortOrder: String = null, isIntermediate: Boolean = false): MergeSamFiles = { val mergeSamFiles = new MergeSamFiles(root) mergeSamFiles.input = input - mergeSamFiles.output = new File(outputDir, input.head.getName.stripSuffix(".bam").stripSuffix(".sam") + ".merge.bam") + mergeSamFiles.output = outputFile if (sortOrder == null) mergeSamFiles.sortOrder = "coordinate" else mergeSamFiles.sortOrder = sortOrder + mergeSamFiles.isIntermediate = isIntermediate mergeSamFiles } } \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFaidx.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFaidx.scala new file mode 100644 index 0000000000000000000000000000000000000000..d9ce0cb80ebcbc4e0daf212a4760ffa17d1d6633 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsFaidx.scala @@ -0,0 +1,49 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.samtools + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +/** Extension for samtools flagstat */ +class SamtoolsFaidx(val root: Configurable) extends Samtools { + @Input(doc = "Bam File") + var input: File = _ + + @Output(doc = "output File") + private var _output: File = _ + + def output = _output + + override def beforeGraph: Unit = { + super.beforeGraph + _output = new File(input.getParentFile, input.getName + ".fai") + } + + /** Returns command to execute */ + def cmdLine = required(executable) + required("faidx") + required(input) +} + +object SamtoolsFaidx { + def apply(root: Configurable, input: File): SamtoolsFaidx = { + val faidx = new SamtoolsFaidx(root) + faidx.input = input + faidx._output = new File(input.getParentFile, input.getName + ".fai") + faidx + } +} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Mpileup2cns.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Mpileup2cns.scala index 0379c36d9ace680b7833bf226912504bb619f8e2..4fb0a05fd7e04617c15a68c63763d7e60da322d4 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Mpileup2cns.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Mpileup2cns.scala @@ -39,6 +39,8 @@ class Mpileup2cns(val root: Configurable) extends Varscan { var vcfSampleList: Option[File] = config("vcf_sample_list") var variants: Option[Int] = config("variants") + override def defaultCoreMemory = 6.0 + override def beforeGraph(): Unit = { val validValues: Set[Int] = Set(0, 1) // check for boolean vars that are passed as ints diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala index f3d8585de87216b872bee7235e364df29514710a..76da0423cea32f599c978e852ca0aa35a740137e 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala @@ -15,20 +15,15 @@ */ package nl.lumc.sasc.biopet.extensions.varscan -import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction +import nl.lumc.sasc.biopet.core.{ Version, BiopetJavaCommandLineFunction } -abstract class Varscan extends BiopetJavaCommandLineFunction { +abstract class Varscan extends BiopetJavaCommandLineFunction with Version { override def subPath = "varscan" :: super.subPath jarFile = config("varscan_jar") - /** - * TODO: test version - * override def versionCommand = super.commandLine - * override val versionRegex = """VarScan v(.*)""".r - */ - - override def defaultCoreMemory = 5.0 + def versionCommand = super.commandLine + def versionRegex = """VarScan v(.*)""".r } diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/BcfToolsTest.scala b/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/BcfToolsTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..ff7ce95d59adcce87590605b5c5f8239f83af50c --- /dev/null +++ b/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/BcfToolsTest.scala @@ -0,0 +1,35 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by ahbbollen on 12-10-15. + */ +class BcfToolsTest extends TestNGSuite with Matchers with MockitoSugar { + + @Test + def BcfToolsViewTest = { + val view = new BcftoolsView(null) + + view.executable = "bcftools" + + val tmpInput = File.createTempFile("bcftoolstest", ".vcf") + tmpInput.deleteOnExit() + val tmpOutput = File.createTempFile("bcftoolstest", ".vcf.gz") + tmpOutput.deleteOnExit() + val inputPath = tmpInput.getAbsolutePath + val outputPath = tmpOutput.getAbsolutePath + + view.input = tmpInput + view.output = tmpOutput + + view.cmd should equal(s"bcftools view -l 9 -O z -o $outputPath $inputPath") + } + +} diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/GsnapTest.scala b/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/GsnapTest.scala index 8dceeb52502cf5b150bc3fa4e41ccdb9cb6aebae..21334014f8a4d0c085a335373067366207a815a9 100644 --- a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/GsnapTest.scala +++ b/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/GsnapTest.scala @@ -16,6 +16,7 @@ package nl.lumc.sasc.biopet.extensions import nl.lumc.sasc.biopet.utils.config.Config +import nl.lumc.sasc.biopet.extensions.gmap.Gsnap import org.scalatest.Matchers import org.scalatest.testng.TestNGSuite import org.testng.SkipException diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/ManweTest.scala b/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/ManweTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..6d1cf3d60c7f2b0d995b9b82b59a1391d37162db --- /dev/null +++ b/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/ManweTest.scala @@ -0,0 +1,372 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.manwe._ +import nl.lumc.sasc.biopet.utils.config.Config +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.io.Source + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweTest extends TestNGSuite with Matchers { + + @Test + def testManweAnnotatedBed = { + val manwe = new ManweAnnotateBed(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + val bed = File.createTempFile("manwe", "bed") + out.deleteOnExit() + bed.deleteOnExit() + + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.bed = bed + manwe.alreadyUploaded = false + manwe.cmd should equal(s"manwe annotate-bed ${bed.getAbsolutePath} -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.queries = List("/uri/1&&/uri/2") + manwe.cmd should equal(s"manwe annotate-bed ${bed.getAbsolutePath} -q /uri/1&&/uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.alreadyUploaded = true + manwe.cmd should equal(s"manwe annotate-bed ${bed.getAbsolutePath} -u -q /uri/1&&/uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe annotate-bed ${bed.getAbsolutePath} -u -q /uri/1&&/uri/2 --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweAnnotateVcf = { + val manwe = new ManweAnnotateVcf(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + val vcf = File.createTempFile("manwe", "vcf") + out.deleteOnExit() + vcf.deleteOnExit() + + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.vcf = vcf + manwe.alreadyUploaded = false + manwe.cmd should equal(s"manwe annotate-vcf ${vcf.getAbsolutePath} -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.queries = List("/uri/1&&/uri/2") + manwe.cmd should equal(s"manwe annotate-vcf ${vcf.getAbsolutePath} -q /uri/1&&/uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.alreadyUploaded = true + manwe.cmd should equal(s"manwe annotate-vcf ${vcf.getAbsolutePath} -u -q /uri/1&&/uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe annotate-vcf ${vcf.getAbsolutePath} -u -q /uri/1&&/uri/2 --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweDataSourcesAnnotate = { + val manwe = new ManweDataSourcesAnnotate(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.uri = Some("/uri/1") + manwe.cmd should equal(s"manwe data-sources annotate /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.queries = List("/uri/1&&/uri/2") + manwe.cmd should equal(s"manwe data-sources annotate /uri/1 -q /uri/1&&/uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe data-sources annotate /uri/1 -q /uri/1&&/uri/2 --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweDataSourcesDownload = { + val manwe = new ManweDataSourcesDownload(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + + manwe.output = out + manwe.uri = "/uri/1" + manwe.cmd should equal(s"manwe data-sources download /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweDataSourcesList = { + val manwe = new ManweDataSourcesList(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.cmd should equal(s"manwe data-sources list -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweDataSourcesShow = { + val manwe = new ManweDataSourcesShow(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.uri = Some("/uri/1") + manwe.cmd should equal(s"manwe data-sources show /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesActivate = { + val manwe = new ManweSamplesActivate(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.uri = "/uri/1" + manwe.cmd should equal(s"manwe samples activate /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesAdd = { + val manwe = new ManweSamplesAdd(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.name = Some("pietje") + manwe.cmd should equal(s"manwe samples add pietje -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.group = List("/uri/1", "/uri/2") + manwe.cmd should equal(s"manwe samples add pietje -g /uri/1 -g /uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.poolSize = Some(3) + manwe.cmd should equal(s"manwe samples add pietje -s 3 -g /uri/1 -g /uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesAnnotateVariations = { + val manwe = new ManweSamplesAnnotateVariations(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.uri = Some("/uri/1") + manwe.cmd should equal(s"manwe samples annotate-variations /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.queries = List("/uri/1&&/uri/2", "/uri/3") + manwe.cmd should equal(s"manwe samples annotate-variations /uri/1 -q /uri/1&&/uri/2 -q /uri/3 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesImport = { + val manwe = new ManweSamplesImport(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.name = Some("pietje") + manwe.cmd should equal(s"manwe samples import pietje -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.group = List("/uri/1&&/uri/2", "/uri/3") + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + val vcfs: List[File] = (0 until 4).map(_ => File.createTempFile("manwe", "test")).toList + val beds: List[File] = (0 until 4).map(_ => File.createTempFile("manwe", "test")).toList + vcfs.foreach(x => x.deleteOnExit()) + beds.foreach(x => x.deleteOnExit()) + manwe.vcfs = vcfs + manwe.beds = beds + + val vcfLine = vcfs.foldLeft("")((r, f) => r + s"--vcf ${f.getAbsolutePath} ").trim + val bedLine = beds.foldLeft("")((r, f) => r + s"--bed ${f.getAbsolutePath} ").trim + + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.poolSize = Some(4) + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.alreadyUploaded = true + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -u -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.public = true + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -u -p -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.preferLikelihood = true + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -u -p -l -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.noCoverage = true + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -u -p -l --no-coverage-profile -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -u -p -l --no-coverage-profile --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesImportBed = { + + val manwe = new ManweSamplesImportBed(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + + val bed = File.createTempFile("manwe", "test") + bed.deleteOnExit() + manwe.bed = bed + + manwe.uri = Some("/uri/1") + manwe.cmd should equal(s"manwe samples import-bed /uri/1 ${bed.getAbsolutePath} -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.alreadyUploaded = true + manwe.cmd should equal(s"manwe samples import-bed /uri/1 ${bed.getAbsolutePath} -u -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe samples import-bed /uri/1 ${bed.getAbsolutePath} -u --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesImportVcf = { + val manwe = new ManweSamplesImportVcf(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + + val vcf = File.createTempFile("manwe", "test") + vcf.deleteOnExit() + manwe.vcf = vcf + + manwe.uri = Some("/uri/1") + manwe.cmd should equal(s"manwe samples import-vcf /uri/1 ${vcf.getAbsolutePath} -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.alreadyUploaded = true + manwe.cmd should equal(s"manwe samples import-vcf /uri/1 ${vcf.getAbsolutePath} -u -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.preferLikelihoods = true + manwe.cmd should equal(s"manwe samples import-vcf /uri/1 ${vcf.getAbsolutePath} -u -l -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe samples import-vcf /uri/1 ${vcf.getAbsolutePath} -u -l --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesList = { + val manwe = new ManweSamplesList(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + + manwe.cmd should equal(s"manwe samples list -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.group = List("/uri/1", "/uri/2") + manwe.cmd should equal(s"manwe samples list -g /uri/1 -g /uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.user = Some("/uri/3") + manwe.cmd should equal(s"manwe samples list -u /uri/3 -g /uri/1 -g /uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.onlyPublic = true + manwe.cmd should equal(s"manwe samples list -u /uri/3 -g /uri/1 -g /uri/2 -p -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesShow = { + val manwe = new ManweSamplesShow(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.uri = Some("/uri/1") + + manwe.cmd should equal(s"manwe samples show /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test def testConfigCreation = { + val manwe = new ManweAnnotateBed(null) { + override def globalConfig = new Config(Map( + "varda_root" -> "http://127.0.0.1:5000", + "varda_token" -> "QWERTYUIOPASDFGHJKLZXCVBNM", + "varda_cache_size" -> 25, + "varda_buffer_size" -> 200, + "varda_task_poll_wait" -> 5, + "varda_verify_certificate" -> true)) + } + + val file: File = manwe.createManweConfig(None) + val contents = Source.fromFile(file).getLines().toList + + val supposedContent = List("API_ROOT = 'http://127.0.0.1:5000'", + "TOKEN = 'QWERTYUIOPASDFGHJKLZXCVBNM'", + "VERIFY_CERTIFICATE = True", + "COLLECTION_CACHE_SIZE = 25", + "DATA_BUFFER_SIZE = 200", + "TASK_POLL_WAIT = 5" + ) + + supposedContent.sorted should equal(contents.sorted) + + val manwe2 = new ManweAnnotateBed(null) { + override def globalConfig = new Config(Map( + "varda_root" -> "http://127.0.0.1:5000", + "varda_token" -> "QWERTYUIOPASDFGHJKLZXCVBNM", + "varda_cache_size" -> 25, + "varda_buffer_size" -> 200, + "varda_task_poll_wait" -> 5, + "varda_verify_certificate" -> "/a/b/c/d.crt")) + } + + val file2: File = manwe2.createManweConfig(None) + val contents2 = Source.fromFile(file2).getLines().toList + val supposedContent2 = List("API_ROOT = 'http://127.0.0.1:5000'", + "TOKEN = 'QWERTYUIOPASDFGHJKLZXCVBNM'", + "VERIFY_CERTIFICATE = '/a/b/c/d.crt'", + "COLLECTION_CACHE_SIZE = 25", + "DATA_BUFFER_SIZE = 200", + "TASK_POLL_WAIT = 5" + ) + + supposedContent2.sorted should equal(contents2.sorted) + } + +} diff --git a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala index a7e1ea63f9a07ccdaf62ebd753b8ee5d46b94e27..3d6276f50fafb7252033a852018f96d1bbfd7dac 100644 --- a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala +++ b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala @@ -22,6 +22,7 @@ object BiopetExecutablePublic extends BiopetExecutable { def publicPipelines: List[MainCommand] = List( nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep, nl.lumc.sasc.biopet.pipelines.mapping.Mapping, + nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMapping, nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap, nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics, nl.lumc.sasc.biopet.pipelines.sage.Sage, diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GvcfToBed.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GvcfToBed.scala new file mode 100644 index 0000000000000000000000000000000000000000..3ad955d35f0768dcc1a056c999778cb8c6b2e205 --- /dev/null +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GvcfToBed.scala @@ -0,0 +1,41 @@ +package nl.lumc.sasc.biopet.extensions.tools + +import java.io.File + +import nl.lumc.sasc.biopet.core.ToolCommandFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output, Input } + +/** + * Created by ahbbollen on 13-10-15. + */ +class GvcfToBed(val root: Configurable) extends ToolCommandFunction { + def toolObject = nl.lumc.sasc.biopet.tools.GvcfToBed + + @Input(doc = "input vcf") + var inputVcf: File = _ + + @Output(doc = "output bed") + var outputBed: File = _ + + @Argument(doc = "sample", required = false) + var sample: Option[String] = None + + @Argument(doc = "minquality", required = false) + var minQuality: Int = 0 + + @Argument(doc = "inverse", required = false) + var inverse: Boolean = false + + override def defaultCoreMemory = 4.0 + + override def cmdLine = { + super.cmdLine + + required("-I", inputVcf) + + required("-O", outputBed) + + optional("-S", sample) + + optional("--minGenomeQuality", minQuality) + + conditional(inverse, "--inverted") + } + +} diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala index df131db588a2709d238213968c18aaa86ffb3962..fd43743c2e249d9f34ab012a1dc941f6736b295d 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala @@ -37,6 +37,7 @@ class VcfFilter(val root: Configurable) extends ToolCommandFunction { var minTotalDepth: Option[Int] = config("min_total_depth") var minAlternateDepth: Option[Int] = config("min_alternate_depth") var minSamplesPass: Option[Int] = config("min_samples_pass") + var minGenomeQuality: Option[Int] = config("min_genome_quality") var filterRefCalls: Boolean = config("filter_ref_calls", default = false) override def defaultCoreMemory = 3.0 @@ -53,5 +54,6 @@ class VcfFilter(val root: Configurable) extends ToolCommandFunction { optional("--minTotalDepth", minTotalDepth) + optional("--minAlternateDepth", minAlternateDepth) + optional("--minSamplesPass", minSamplesPass) + + optional("--minGenomeQuality", minGenomeQuality) + conditional(filterRefCalls, "--filterRefCalls") } diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala new file mode 100644 index 0000000000000000000000000000000000000000..f96835488102e2a7c7a43d153a5986faa5e158f7 --- /dev/null +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala @@ -0,0 +1,108 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.{ File, PrintWriter } + +import htsjdk.variant.variantcontext.VariantContext +import htsjdk.variant.vcf.VCFFileReader +import nl.lumc.sasc.biopet.utils.{ VcfUtils, ToolCommand } +import nl.lumc.sasc.biopet.utils.intervals.BedRecord + +import scala.collection.JavaConversions._ + +/** + * Created by ahbbollen on 13-10-15. + * Create bed track from genome quality values in (g)VCF + */ +object GvcfToBed extends ToolCommand { + + case class Args(inputVcf: File = null, + outputBed: File = null, + invertedOutputBed: Option[File] = None, + sample: Option[String] = None, + minGenomeQuality: Int = 0, + inverse: Boolean = false) extends AbstractArgs + + class OptParser extends AbstractOptParser { + opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) => + c.copy(inputVcf = x) + } text "Input vcf file" + opt[File]('O', "outputBed") required () maxOccurs 1 valueName "<file>" action { (x, c) => + c.copy(outputBed = x) + } text "Output bed file" + opt[File]("invertedOutputBed") maxOccurs 1 valueName "<file>" action { (x, c) => + c.copy(invertedOutputBed = Some(x)) + } text "Output bed file" + opt[String]('S', "sample") unbounded () maxOccurs 1 valueName "<sample>" action { (x, c) => + c.copy(sample = Some(x)) + } text "Sample to consider. Will take first sample on alphabetical order by default" + opt[Int]("minGenomeQuality") unbounded () maxOccurs 1 valueName "<int>" action { (x, c) => + c.copy(minGenomeQuality = x) + } text "Minimum genome quality to consider" + } + + def main(args: Array[String]): Unit = { + val argsParser = new OptParser + val cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1) + + logger.debug("Opening reader") + val reader = new VCFFileReader(cmdArgs.inputVcf, false) + logger.debug("Opening writer") + val writer = new PrintWriter(cmdArgs.outputBed) + val invertedWriter = cmdArgs.invertedOutputBed.collect { + case file => + logger.debug("Opening inverted writer") + new PrintWriter(file) + } + + val sample = cmdArgs.sample.getOrElse(reader.getFileHeader.getSampleNamesInOrder.head) + + val it = reader.iterator() + val firstRecord = it.next() + var contig = firstRecord.getContig + var start = firstRecord.getStart + var end = firstRecord.getEnd + var pass = VcfUtils.hasMinGenomeQuality(firstRecord, sample, cmdArgs.minGenomeQuality) + + def writeResetCachedRecord(newRecord: VariantContext): Unit = { + writeCachedRecord() + contig = newRecord.getContig + start = newRecord.getStart + end = newRecord.getEnd + pass = VcfUtils.hasMinGenomeQuality(newRecord, sample, cmdArgs.minGenomeQuality) + } + + def writeCachedRecord(): Unit = { + if (pass) writer.println(new BedRecord(contig, start - 1, end)) + else invertedWriter.foreach(_.println(new BedRecord(contig, start - 1, end))) + } + + var counter = 1 + logger.info("Start") + for (r <- it) { + if (contig == r.getContig) { + val p = VcfUtils.hasMinGenomeQuality(r, sample, cmdArgs.minGenomeQuality) + if (p != pass || r.getStart > (end + 1)) writeResetCachedRecord(r) + else end = r.getEnd + } else writeResetCachedRecord(r) + + counter += 1 + if (counter % 100000 == 0) { + logger.info(s"Processed $counter records") + } + } + writeCachedRecord() + + logger.info(s"Processed $counter records") + + logger.debug("Closing writer") + writer.close() + invertedWriter.foreach { w => + logger.debug("Closing inverted writer") + w.close() + } + logger.debug("Closing reader") + reader.close() + + logger.info("Finished!") + } +} diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala index 2799fa38b78b6d2acb123b80d5ae5a3bdde3bf3a..c72222d9fc3278f396c7bdd7b5dff645cf4f2620 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala @@ -54,7 +54,8 @@ object VcfFilter extends ToolCommand { filterHetVarToHomVar: List[(String, String)] = Nil, filterRefCalls: Boolean = false, filterNoCalls: Boolean = false, - iDset: Set[String] = Set()) extends AbstractArgs + iDset: Set[String] = Set(), + minGenomeQuality: Int = 0) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) => @@ -128,6 +129,9 @@ object VcfFilter extends ToolCommand { opt[File]("idFile") unbounded () action { (x, c) => c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines()) } text "File that contain list of IDs to get from vcf file" + opt[Int]("minGenomeQuality") unbounded () action { (x, c) => + c.copy(minGenomeQuality = x) + } } /** @param args the command line arguments */ @@ -161,6 +165,7 @@ object VcfFilter extends ToolCommand { hasMinTotalDepth(record, cmdArgs.minTotalDepth) && hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) && minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) && + minGenomeQuality(record, cmdArgs.minGenomeQuality, cmdArgs.minSamplesPass) && (cmdArgs.mustHaveVariant.isEmpty || mustHaveVariant(record, cmdArgs.mustHaveVariant)) && calledIn(record, cmdArgs.calledIn) && hasGenotype(record, cmdArgs.mustHaveGenotype) && @@ -263,6 +268,18 @@ object VcfFilter extends ToolCommand { }) >= minSamplesPass } + /** + * Checks if genome quality field has minimum value + * @param record VCF record + * @param minGQ smallest GQ allowed + * @param minSamplesPass number of samples to consider + * @return + */ + def minGenomeQuality(record: VariantContext, minGQ: Int, minSamplesPass: Int = 1): Boolean = { + record.getGenotypes.count(x => if (!x.hasGQ) false + else if (x.getGQ >= minGQ) true else false) >= minSamplesPass + } + /** * Checks if given samples does have a variant hin this record * @param record VCF record diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GvcfToBedTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GvcfToBedTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..5664c9c6fe306a3d845fd1cbe1bf9ded22f153f3 --- /dev/null +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GvcfToBedTest.scala @@ -0,0 +1,42 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import htsjdk.variant.vcf.VCFFileReader +import nl.lumc.sasc.biopet.utils.VcfUtils +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite + +import GvcfToBed._ +import org.testng.annotations.Test + +/** + * Created by ahbbollen on 13-10-15. + */ +class GvcfToBedTest extends TestNGSuite with Matchers with MockitoSugar { + + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val vcf3 = new File(resourcePath("/VCFv3.vcf")) + val vepped = new File(resourcePath("/VEP_oneline.vcf")) + val unvepped = new File(resourcePath("/unvepped.vcf")) + + val vepped_path = resourcePath("/VEP_oneline.vcf") + + @Test def testMinQuality = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + VcfUtils.hasMinGenomeQuality(record, "Sample_101", 99) shouldBe true + + val reader2 = new VCFFileReader(unvepped, false) + val record2 = reader2.iterator.next() + + VcfUtils.hasMinGenomeQuality(record2, "Sample_102", 3) shouldBe true + VcfUtils.hasMinGenomeQuality(record2, "Sample_102", 99) shouldBe false + } +} diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala index 7149ab194d03e1afd40ea99b39b3b39674b2533d..4b3796ee4a3da1fe7bf9635b6d65914b812d640f 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala @@ -144,6 +144,15 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers { minAlternateDepth(record, 20, 2) shouldBe false } + @Test def testHasMinGQ() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + minGenomeQuality(record, 99, 1) shouldBe true + minGenomeQuality(record, 99, 2) shouldBe true + minGenomeQuality(record, 99, 3) shouldBe true + } + @Test def testMustHaveVariant() = { val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala index f13230534af0a61fa29f68daa8d48c81a55e1f23..d0c185cf1a837c816404c2c160bc8f6428e1cc19 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala @@ -25,7 +25,8 @@ object BamUtils { val inputSam = SamReaderFactory.makeDefault.open(file) val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct if (samples.size == 1) samples.head -> file - else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file) + else if (samples.size > 1) throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file) + else throw new IllegalArgumentException("Bam does not contain sample ID or have no readgroups defined: " + file) } if (temp.map(_._1).distinct.size != temp.size) throw new IllegalArgumentException("Samples has been found twice") temp.toMap diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala index 93a43e273f1a8aa0be87fc9fe3eeb44e9c0b9067..9d4b9dc2bf84bcda084ce5de9647620e1a130658 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/Logging.scala @@ -36,25 +36,26 @@ trait Logging { object Logging { val logger = Logger.getRootLogger - private val errors: ListBuffer[Exception] = ListBuffer() + private[biopet] val errors: ListBuffer[Exception] = ListBuffer() def addError(error: String, debug: String = null): Unit = { val msg = error + (if (debug != null && logger.isDebugEnabled) "; " + debug else "") errors.append(new Exception(msg)) } - def checkErrors(): Unit = { + def checkErrors(debug: Boolean = false): Unit = { if (errors.nonEmpty) { logger.error("*************************") logger.error("Biopet found some errors:") - if (logger.isDebugEnabled) { + if (debug || logger.isDebugEnabled) { for (e <- errors) { logger.error(e.getMessage) - logger.debug(e.getStackTrace.mkString("Stack trace:\n", "\n", "\n")) + logger.error(e.getStackTrace.mkString("Stack trace:\n", "\n", "\n")) } } else { errors.map(_.getMessage).sorted.distinct.foreach(logger.error(_)) } + errors.clear() throw new IllegalStateException("Biopet found errors") } } diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala index 8e375f4e7e35cbb49c9cc90c688753b2b6ca42ea..e724575ac08dd288e9ff5ae08237bdb0cb208657 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala @@ -15,9 +15,11 @@ */ package nl.lumc.sasc.biopet.utils +import java.io.File import java.util -import htsjdk.variant.variantcontext.VariantContext +import htsjdk.variant.variantcontext.{ Genotype, VariantContext } +import htsjdk.variant.vcf.{ VCFFileReader, VCFHeader, VCFFilterHeaderLine } import scala.collection.JavaConversions._ @@ -80,4 +82,49 @@ object VcfUtils { var1.getEnd == var2.getEnd && var1.getAttributes == var2.getAttributes } + + /** + * Return true if header is a block-type GVCF file + * @param header header of Vcf file + * @return boolean + */ + def isBlockGVcf(header: VCFHeader): Boolean = { + header.getMetaDataLine("GVCFBlock") != null + } + + /** + * Get sample IDs from vcf File + * @param vcf File object pointing to vcf + * @return list of strings with sample IDs + */ + def getSampleIds(vcf: File): List[String] = { + val reader = new VCFFileReader(vcf, false) + val samples = reader.getFileHeader.getSampleNamesInOrder.toList + reader.close() + samples + } + + /** + * Check whether record has minimum genome Quality + * @param record variant context + * @param sample sample name + * @param minGQ minimum genome quality value + * @return + */ + def hasMinGenomeQuality(record: VariantContext, sample: String, minGQ: Int): Boolean = { + if (!record.getSampleNamesOrderedByName.contains(sample)) + throw new IllegalArgumentException("Sample does not exist") + val gt = record.getGenotype(sample) + hasMinGenomeQuality(gt, minGQ) + } + + /** + * Check whether genotype has minimum genome Quality + * @param gt Genotype + * @param minGQ minimum genome quality value + * @return + */ + def hasMinGenomeQuality(gt: Genotype, minGQ: Int): Boolean = { + gt.hasGQ && gt.getGQ >= minGQ + } } diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala index a31ca43355b72f09d219340725e23d6a0aca2eba..17e358d2d86f870f983fe692df9c02475aabf5e4 100644 --- a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala +++ b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala @@ -18,16 +18,14 @@ package nl.lumc.sasc.biopet.pipelines.carp import java.io.File import nl.lumc.sasc.biopet.core._ -import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView -import nl.lumc.sasc.biopet.utils.config._ -import nl.lumc.sasc.biopet.core.summary.SummaryQScript -import nl.lumc.sasc.biopet.extensions.Ln +import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension import nl.lumc.sasc.biopet.extensions.macs2.Macs2CallPeak -import nl.lumc.sasc.biopet.extensions.picard.{ BuildBamIndex, MergeSamFiles } +import nl.lumc.sasc.biopet.extensions.picard.BuildBamIndex +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig -import nl.lumc.sasc.biopet.pipelines.mapping.Mapping -import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait +import nl.lumc.sasc.biopet.utils.config._ import org.broadinstitute.gatk.queue.QScript /** @@ -35,7 +33,7 @@ import org.broadinstitute.gatk.queue.QScript * Chip-Seq analysis pipeline * This pipeline performs QC,mapping and peak calling */ -class Carp(val root: Configurable) extends QScript with MultiSampleQScript with SummaryQScript with Reference { +class Carp(val root: Configurable) extends QScript with MultisampleMappingTrait with Reference { qscript => def this() = this(null) @@ -44,6 +42,7 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript with "skip_markduplicates" -> false, "aligner" -> "bwa-mem" ), + "merge_strategy" -> "preprocessmergesam", "samtoolsview" -> Map("q" -> 10) ) @@ -56,136 +55,70 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript with def summaryFile = new File(outputDir, "Carp.summary.json") - //TODO: Add summary - def summaryFiles = Map("reference" -> referenceFasta()) - - //TODO: Add summary - def summarySettings = Map("reference" -> referenceSummary) - - def makeSample(id: String) = new Sample(id) - class Sample(sampleId: String) extends AbstractSample(sampleId) { - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() - - //TODO: Add summary - def summaryStats: Map[String, Any] = Map() - - def makeLibrary(id: String) = new Library(id) - class Library(libId: String) extends AbstractLibrary(libId) { - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() + override def makeSample(id: String) = new Sample(id) + class Sample(sampleId: String) extends super.Sample(sampleId) { - //TODO: Add summary - def summaryStats: Map[String, Any] = Map() - - val mapping = new Mapping(qscript) - mapping.libId = Some(libId) - mapping.sampleId = Some(sampleId) - mapping.outputDir = libDir - - def addJobs(): Unit = { - if (config.contains("R1")) { - mapping.input_R1 = config("R1") - if (config.contains("R2")) mapping.input_R2 = config("R2") - - inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5")) - mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5"))) - - mapping.init() - mapping.biopetScript() - addAll(mapping.functions) - - } else logger.error("Sample: " + sampleId + ": No R1 found for library: " + libId) - - addSummaryQScript(mapping) - } - } + override def preProcessBam = Some(createFile(".filter.bam")) - val bamFile = createFile(".bam") - val bamFileFilter = createFile(".filter.bam") val controls: List[String] = config("control", default = Nil) - def addJobs(): Unit = { - addPerLibJobs() - val bamFiles = libraries.map(_._2.mapping.finalBamFile).toList - if (bamFiles.length == 1) { - add(Ln(qscript, bamFiles.head, bamFile)) - val oldIndex = new File(bamFiles.head.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex = new File(bamFile.getAbsolutePath.stripSuffix(".bam") + ".bai") - add(Ln(qscript, oldIndex, newIndex)) - } else if (bamFiles.length > 1) { - val merge = new MergeSamFiles(qscript) - merge.input = bamFiles - merge.sortOrder = "coordinate" - merge.output = bamFile - add(merge) - } - - val bamMetrics = BamMetrics(qscript, bamFile, new File(sampleDir, "metrics"), sampleId = Some(sampleId)) - addAll(bamMetrics.functions) - addSummaryQScript(bamMetrics) + override def summarySettings = super.summarySettings ++ Map("controls" -> controls) - val bamMetricsFilter = BamMetrics(qscript, bamFileFilter, new File(sampleDir, "metrics-filter"), sampleId = Some(sampleId)) - addAll(bamMetricsFilter.functions) - bamMetricsFilter.summaryName = "bammetrics-filter" - addSummaryQScript(bamMetricsFilter) + override def addJobs(): Unit = { + super.addJobs() - addAll(Bam2Wig(qscript, bamFile).functions) - addAll(Bam2Wig(qscript, bamFileFilter).functions) + add(Bam2Wig(qscript, bamFile.get)) val samtoolsView = new SamtoolsView(qscript) - samtoolsView.input = bamFile - samtoolsView.output = bamFileFilter + samtoolsView.input = bamFile.get + samtoolsView.output = preProcessBam.get samtoolsView.b = true samtoolsView.h = true add(samtoolsView) + val bamMetricsFilter = BamMetrics(qscript, preProcessBam.get, new File(sampleDir, "metrics-filter"), sampleId = Some(sampleId)) + addAll(bamMetricsFilter.functions) + bamMetricsFilter.summaryName = "bammetrics-filter" + addSummaryQScript(bamMetricsFilter) + + add(Bam2Wig(qscript, preProcessBam.get)) + val buildBamIndex = new BuildBamIndex(qscript) - buildBamIndex.input = bamFileFilter - buildBamIndex.output = swapExt(bamFileFilter.getParent, bamFileFilter, ".bam", ".bai") + buildBamIndex.input = preProcessBam.get + buildBamIndex.output = swapExt(preProcessBam.get.getParentFile, preProcessBam.get, ".bam", ".bai") add(buildBamIndex) val macs2 = new Macs2CallPeak(qscript) - macs2.treatment = bamFileFilter + macs2.treatment = preProcessBam.get macs2.name = Some(sampleId) macs2.outputdir = sampleDir + File.separator + "macs2" + File.separator + sampleId + File.separator add(macs2) } } - override def reportClass = { + override def reportClass: Option[ReportBuilderExtension] = { val carp = new CarpReport(this) carp.outputDir = new File(outputDir, "report") carp.summaryFile = summaryFile Some(carp) } - def init() = { + override def init() = { + super.init() // ensure that no samples are called 'control' since that is our reserved keyword require(!sampleIds.contains("control"), "No sample should be named 'control' since it is a reserved for the Carp pipeline") } - def biopetScript() { - // Define what the pipeline should do - // First step is QC, this will be done with Flexiprep - // Second step is mapping, this will be done with the Mapping pipeline - // Third step is calling peaks on the bam files produced with the mapping pipeline, this will be done with MACS2 - logger.info("Starting CArP pipeline") - - addSamplesJobs() - - addSummaryJobs() - } - - def addMultiSampleJobs(): Unit = { + override def addMultiSampleJobs(): Unit = { + super.addMultiSampleJobs() for ((sampleId, sample) <- samples) { for (controlId <- sample.controls) { if (!samples.contains(controlId)) throw new IllegalStateException("For sample: " + sampleId + " this control: " + controlId + " does not exist") val macs2 = new Macs2CallPeak(this) - macs2.treatment = sample.bamFileFilter - macs2.control = samples(controlId).bamFileFilter + macs2.treatment = sample.preProcessBam.get + macs2.control = samples(controlId).preProcessBam.get macs2.name = Some(sampleId + "_VS_" + controlId) macs2.outputdir = sample.sampleDir + File.separator + "macs2" + File.separator + macs2.name.get + File.separator add(macs2) diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala index a06cef16f169732aba3ebe31357f907d24ac2eca..756d1e33d32f8008ed5c06fc0f8d2595c15d3b76 100644 --- a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala +++ b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/CarpReport.scala @@ -15,10 +15,9 @@ */ package nl.lumc.sasc.biopet.pipelines.carp +import nl.lumc.sasc.biopet.core.report.{ ReportSection, ReportBuilderExtension } +import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingReportTrait import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportSection, ReportPage, MultisampleReportBuilder } -import nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport -import nl.lumc.sasc.biopet.pipelines.flexiprep.FlexiprepReport /** * Class for report for CArp @@ -29,71 +28,10 @@ class CarpReport(val root: Configurable) extends ReportBuilderExtension { def builder = CarpReport } -object CarpReport extends MultisampleReportBuilder { - - /** Root page for the carp report */ - def indexPage = { - //Source.fromInputStream(getClass.getResourceAsStream("/nl/lumc/sasc/biopet/pipelines/carp/carpFont.ssp")).foreach(print(_)) - ReportPage( - List("Samples" -> generateSamplesPage(pageArgs)) ++ - Map("Files" -> filesPage, - "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp" - )), Map()) - ), - List( - "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/carp/carpFront.ssp"), - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false) - ), - "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), - "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp", - Map("showPlot" -> true, "showTable" -> false)), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp", - Map("showPlot" -> true, "showTable" -> false)) - ), - pageArgs - ) - } - - /** Files page, can be used general or at sample level */ - def filesPage: ReportPage = ReportPage(List(), List( - "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"), - "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"), - "Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)) //, - //"Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", - // Map("pipelineName" -> "shiva", "fileTag" -> "preProcessBam")) - ), Map()) - - /** Single sample page */ - def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = { - ReportPage(List( - "Libraries" -> generateLibraryPage(args), - "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), None), - "Files" -> filesPage - ), List( - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", - if (summary.libraries(sampleId).size > 1) Map("showPlot" -> true) else Map()), - "Merged" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true)), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ), args) - } - - /** Library page */ - def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = { - ReportPage(List( - "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)), - "QC" -> FlexiprepReport.flexiprepPage - ), List( - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ), args) - } - +object CarpReport extends MultisampleMappingReportTrait { /** Name of the report */ def reportName = "Carp Report" + override def frontSection = ReportSection("/nl/lumc/sasc/biopet/pipelines/carp/carpFront.ssp") + + override def pipelineName = "carp" } \ No newline at end of file diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala index b34b3772296f9de419f7a249d45daefc513c7259..f974f8c9a43f685390ee0c510ffa0064d07167b5 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala @@ -46,16 +46,16 @@ class Cutadapt(root: Configurable, fastqc: Fastqc) extends nl.lumc.sasc.biopet.e // adapter sequence is clipped but not found by FastQC ~ should not happen since all clipped adapter // sequences come from FastQC case _ => - throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in '$fastq_input'.") + throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in '$fastqInput'.") } // FastQC found no adapters case otherwise => ; - logger.debug(s"No adapters found for summarizing in '$fastq_input'.") + logger.debug(s"No adapters found for summarizing in '$fastqInput'.") None } // "adapters" key not found ~ something went wrong in our part - case _ => throw new RuntimeException(s"Required key 'adapters' not found in stats entry '$fastq_input'.") + case _ => throw new RuntimeException(s"Required key 'adapters' not found in stats entry '$fastqInput'.") } initStats.updated(adaptersStatsName, adapterCounts) } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala index 22a9a4a526a0a8d0c640b9262baef99063f07288..390d68b58e5335305e836effa745bc36c9c5b8fa 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala @@ -102,12 +102,12 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman val foundAdapters = fastqc.foundAdapters.map(_.seq) if (foundAdapters.nonEmpty) { val cutadapt = new Cutadapt(root, fastqc) - cutadapt.fastq_input = seqtk.output - cutadapt.fastq_output = new File(output.getParentFile, input.getName + ".cutadapt.fq") - cutadapt.stats_output = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats") - if (cutadapt.default_clip_mode == "3") cutadapt.opt_adapter ++= foundAdapters - else if (cutadapt.default_clip_mode == "5") cutadapt.opt_front ++= foundAdapters - else if (cutadapt.default_clip_mode == "both") cutadapt.opt_anywhere ++= foundAdapters + cutadapt.fastqInput = seqtk.output + cutadapt.fastqOutput = new File(output.getParentFile, input.getName + ".cutadapt.fq") + cutadapt.statsOutput = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats") + if (cutadapt.defaultClipMode == "3") cutadapt.adapter ++= foundAdapters + else if (cutadapt.defaultClipMode == "5") cutadapt.front ++= foundAdapters + else if (cutadapt.defaultClipMode == "both") cutadapt.anywhere ++= foundAdapters addPipeJob(cutadapt) Some(cutadapt) } else None @@ -117,7 +117,7 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman val sickle = new Sickle(root) sickle.output_stats = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.trim.stats") sickle.input_R1 = clip match { - case Some(c) => c.fastq_output + case Some(c) => c.fastqOutput case _ => seqtk.output } sickle.output_R1 = new File(output.getParentFile, input.getName + ".sickle.fq") @@ -127,7 +127,7 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman val outputFile = (clip, trim) match { case (_, Some(t)) => t.output_R1 - case (Some(c), _) => c.fastq_output + case (Some(c), _) => c.fastqOutput case _ => seqtk.output } diff --git a/public/generate-indexes/pom.xml b/public/generate-indexes/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..5e748e8636e83a1e5f49d613e9f34bfb8c1fb806 --- /dev/null +++ b/public/generate-indexes/pom.xml @@ -0,0 +1,50 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + Biopet is built on top of GATK Queue for building bioinformatic + pipelines. It is mainly intended to support LUMC SHARK cluster which is running + SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + should also be able to execute Biopet tools and pipelines. + + Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + + Contact us at: sasc@lumc.nl + + A dual licensing mode is applied. The source code within this project that are + not part of GATK Queue is freely available for non-commercial use under an AGPL + license; For commercial users or users who do not want to follow the AGPL + license, please contact us to obtain a separate license. + +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <artifactId>GenerateIndexes</artifactId> + <packaging>jar</packaging> + + <parent> + <groupId>nl.lumc.sasc</groupId> + <artifactId>Biopet</artifactId> + <version>0.6.0-SNAPSHOT</version> + <relativePath>../</relativePath> + </parent> + + <inceptionYear>2014</inceptionYear> + <name>GenerateIndexes</name> + + <dependencies> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>BiopetCore</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>nl.lumc.sasc</groupId> + <artifactId>BiopetExtensions</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + +</project> \ No newline at end of file diff --git a/public/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala b/public/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala new file mode 100644 index 0000000000000000000000000000000000000000..6e2aa683f8e6e2abe31e2e8307d71db8c41c5258 --- /dev/null +++ b/public/generate-indexes/src/main/scala/nl/lumc/sasc/biopet/pipelines/GenerateIndexes.scala @@ -0,0 +1,268 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.pipelines + +import java.io.PrintWriter +import java.util + +import nl.lumc.sasc.biopet.core.extensions.Md5sum +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, BiopetQScript, PipelineCommand } +import nl.lumc.sasc.biopet.extensions._ +import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie2Build, BowtieBuild } +import nl.lumc.sasc.biopet.extensions.bwa.BwaIndex +import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants +import nl.lumc.sasc.biopet.extensions.gmap.GmapBuild +import nl.lumc.sasc.biopet.extensions.picard.CreateSequenceDictionary +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFaidx +import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.broadinstitute.gatk.queue.QScript +import scala.language.reflectiveCalls + +import scala.collection.JavaConversions._ + +class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript { + def this() = this(null) + + @Argument + var referenceConfigFile: File = _ + + var referenceConfig: Map[String, Any] = Map() + + var configDeps: List[File] = Nil + + def outputConfigFile = new File(outputDir, "reference.json") + + /** This is executed before the script starts */ + def init(): Unit = { + referenceConfig = ConfigUtils.fileToConfigMap(referenceConfigFile) + } + + /** Method where jobs must be added */ + def biopetScript(): Unit = { + + val outputConfig = for ((speciesName, c) <- referenceConfig) yield speciesName -> { + val speciesConfig = ConfigUtils.any2map(c) + val speciesDir = new File(outputDir, speciesName) + for ((genomeName, c) <- speciesConfig) yield genomeName -> { + val genomeConfig = ConfigUtils.any2map(c) + val fastaUris = genomeConfig.getOrElse("fasta_uri", + throw new IllegalArgumentException(s"No fasta_uri found for $speciesName - $genomeName")) match { + case a: Array[_] => a.map(_.toString) + case a => Array(a.toString) + } + + val genomeDir = new File(speciesDir, genomeName) + val fastaFile = new File(genomeDir, "reference.fa") + var outputConfig: Map[String, Any] = Map("reference_fasta" -> fastaFile) + + val fastaFiles = for (fastaUri <- fastaUris) yield { + val curl = new Curl(this) + curl.url = fastaUri + curl.output = if (fastaUris.length > 1 || fastaUri.endsWith(".gz")) { + curl.isIntermediate = true + new File(genomeDir, new File(fastaUri).getName) + } else fastaFile + + add(curl) + add(Md5sum(this, curl.output, genomeDir)) + configDeps :+= curl.output + curl.output + } + + val fastaCat = new CommandLineFunction { + var cmds: Array[BiopetCommandLineFunction] = Array() + + @Input + var input: List[File] = Nil + + @Output + var output = fastaFile + def commandLine = cmds.mkString(" && ") + } + + if (fastaUris.length > 1 || fastaFiles.filter(_.getName.endsWith(".gz")).nonEmpty) { + fastaFiles.foreach { file => + if (file.getName.endsWith(".gz")) { + val zcat = new Zcat(this) + zcat.appending = true + zcat.input :+= file + zcat.output = fastaFile + fastaCat.cmds :+= zcat + fastaCat.input :+= file + } else { + val cat = new Cat(this) + cat.appending = true + cat.input :+= file + cat.output = fastaFile + fastaCat.cmds :+= cat + fastaCat.input :+= file + } + } + add(fastaCat) + configDeps :+= fastaCat.output + } + + val faidx = SamtoolsFaidx(this, fastaFile) + add(faidx) + configDeps :+= faidx.output + + val createDict = new CreateSequenceDictionary(this) + createDict.reference = fastaFile + createDict.output = new File(genomeDir, fastaFile.getName.stripSuffix(".fa") + ".dict") + createDict.species = Some(speciesName) + createDict.genomeAssembly = Some(genomeName) + createDict.uri = Some(fastaUris.mkString(",")) + add(createDict) + configDeps :+= createDict.output + + def createLinks(dir: File): File = { + val newFastaFile = new File(dir, fastaFile.getName) + val newFai = new File(dir, faidx.output.getName) + val newDict = new File(dir, createDict.output.getName) + + add(Ln(this, faidx.output, newFai)) + add(Ln(this, createDict.output, newDict)) + val lnFasta = Ln(this, fastaFile, newFastaFile) + lnFasta.deps ++= List(newFai, newDict) + add(lnFasta) + newFastaFile + } + + val annotationDir = new File(genomeDir, "annotation") + + genomeConfig.get("vep_cache_uri").foreach { vepCacheUri => + val vepDir = new File(annotationDir, "vep") + val curl = new Curl(this) + curl.url = vepCacheUri.toString + curl.output = new File(vepDir, new File(curl.url).getName) + curl.isIntermediate = true + add(curl) + + val tar = new TarExtract(this) + tar.inputTar = curl.output + tar.outputDir = vepDir + add(tar) + + val regex = """.*\/(.*)_vep_(\d*)_(.*)\.tar\.gz""".r + vepCacheUri.toString match { + case regex(species, version, assembly) if (version.forall(_.isDigit)) => { + outputConfig ++= Map("varianteffectpredictor" -> Map( + "species" -> species, + "assembly" -> assembly, + "cache_version" -> version.toInt, + "cache" -> vepDir, + "fasta" -> createLinks(vepDir))) + } + case _ => throw new IllegalArgumentException("Cache found but no version was found") + } + } + + genomeConfig.get("dbsnp_vcf_uri").foreach { dbsnpUri => + val cv = new CombineVariants(this) + cv.reference = fastaFile + cv.deps ::= createDict.output + def addDownload(uri: String): Unit = { + val curl = new Curl(this) + curl.url = uri + curl.output = new File(annotationDir, new File(curl.url).getName) + curl.isIntermediate = true + add(curl) + cv.inputFiles ::= curl.output + + val tabix = new Tabix(this) + tabix.input = curl.output + tabix.p = Some("vcf") + tabix.isIntermediate = true + add(tabix) + configDeps :+= tabix.outputIndex + cv.deps ::= tabix.outputIndex + } + + dbsnpUri match { + case l: Traversable[_] => l.foreach(x => addDownload(x.toString)) + case l: util.ArrayList[_] => l.foreach(x => addDownload(x.toString)) + case _ => addDownload(dbsnpUri.toString) + } + + cv.outputFile = new File(annotationDir, "dbsnp.vcf.gz") + add(cv) + } + + // Bwa index + val bwaIndex = new BwaIndex(this) + bwaIndex.reference = createLinks(new File(genomeDir, "bwa")) + add(bwaIndex) + configDeps :+= bwaIndex.jobOutputFile + outputConfig += "bwa" -> Map("reference_fasta" -> bwaIndex.reference.getAbsolutePath) + + // Gmap index + val gmapDir = new File(genomeDir, "gmap") + val gmapBuild = new GmapBuild(this) + gmapBuild.dir = gmapDir + gmapBuild.db = genomeName + gmapBuild.fastaFiles ::= createLinks(gmapDir) + add(gmapBuild) + configDeps :+= gmapBuild.jobOutputFile + outputConfig += "gsnap" -> Map("dir" -> gmapBuild.dir.getAbsolutePath, "db" -> genomeName) + outputConfig += "gmap" -> Map("dir" -> gmapBuild.dir.getAbsolutePath, "db" -> genomeName) + + val starDir = new File(genomeDir, "star") + val starIndex = new Star(this) + starIndex.outputDir = starDir + starIndex.reference = createLinks(starDir) + starIndex.runmode = "genomeGenerate" + add(starIndex) + configDeps :+= starIndex.jobOutputFile + outputConfig += "star" -> Map( + "reference_fasta" -> starIndex.reference.getAbsolutePath, + "genomeDir" -> starDir.getAbsolutePath + ) + + val bowtieIndex = new BowtieBuild(this) + bowtieIndex.reference = createLinks(new File(genomeDir, "bowtie")) + bowtieIndex.baseName = "reference" + add(bowtieIndex) + configDeps :+= bowtieIndex.jobOutputFile + outputConfig += "bowtie" -> Map("reference_fasta" -> bowtieIndex.reference.getAbsolutePath) + + val bowtie2Index = new Bowtie2Build(this) + bowtie2Index.reference = createLinks(new File(genomeDir, "bowtie2")) + bowtie2Index.baseName = "reference" + add(bowtie2Index) + configDeps :+= bowtie2Index.jobOutputFile + outputConfig += "bowtie2" -> Map("reference_fasta" -> bowtie2Index.reference.getAbsolutePath) + outputConfig += "tophat" -> Map( + "bowtie_index" -> bowtie2Index.reference.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta") + ) + + outputConfig + } + } + + add(new InProcessFunction { + @Input val deps: List[File] = configDeps + + def run: Unit = { + val writer = new PrintWriter(outputConfigFile) + writer.println(ConfigUtils.mapToJson(Map("references" -> outputConfig)).spaces2) + writer.close() + } + }) + } +} + +object GenerateIndexes extends PipelineCommand diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala index 409e027dc5165e27db853f50c2f9f49b90ee3970..0eab05598fc49307439534a0845a03d415b863fd 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala @@ -19,16 +19,15 @@ import java.io.File import nl.lumc.sasc.biopet.FullVersion import nl.lumc.sasc.biopet.core._ -import nl.lumc.sasc.biopet.core.summary._ +import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension import nl.lumc.sasc.biopet.extensions.picard.{ MergeSamFiles, SortSam } import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView import nl.lumc.sasc.biopet.extensions.tools.{ MergeTables, WipeReads } import nl.lumc.sasc.biopet.extensions.{ HtseqCount, Ln } -import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig import nl.lumc.sasc.biopet.pipelines.gentrap.extensions.{ CustomVarScan, Pdflatex, RawBaseCounter } import nl.lumc.sasc.biopet.pipelines.gentrap.scripts.{ AggrBaseCount, PdfReportTemplateWriter, PlotHeatmap } -import nl.lumc.sasc.biopet.pipelines.mapping.Mapping +import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait import nl.lumc.sasc.biopet.utils.Logging import nl.lumc.sasc.biopet.utils.config._ import org.broadinstitute.gatk.queue.QScript @@ -46,9 +45,7 @@ import scalaz.Scalaz._ * @author Wibowo Arindrarto <w.arindrarto@lumc.nl> */ class Gentrap(val root: Configurable) extends QScript - with MultiSampleQScript - with SummaryQScript - with Reference { qscript => + with MultisampleMappingTrait { qscript => import Gentrap.ExpMeasures._ import Gentrap.StrandProtocol._ @@ -57,6 +54,13 @@ class Gentrap(val root: Configurable) extends QScript // alternative constructor for initialization with empty configuration def this() = this(null) + override def reportClass: Option[ReportBuilderExtension] = { + val report = new GentrapReport(this) + report.outputDir = new File(outputDir, "report") + report.summaryFile = summaryFile + Some(report) + } + /** Split aligner to use */ var aligner: String = config("aligner", default = "gsnap") @@ -102,24 +106,25 @@ class Gentrap(val root: Configurable) extends QScript /** Whether to do simple variant calling on RNA or not */ var callVariants: Boolean = config("call_variants", default = false) - /** Settings for all Picard CollectRnaSeqMetrics runs */ - private def collectRnaSeqMetricsSettings: Map[String, String] = Map( - "strand_specificity" -> (strandProtocol match { - case NonSpecific => StrandSpecificity.NONE.toString - case Dutp => StrandSpecificity.SECOND_READ_TRANSCRIPTION_STRAND.toString - case otherwise => throw new IllegalStateException(otherwise.toString) - })) ++ (ribosomalRefFlat match { - case Some(rbs) => Map("ribosomal_intervals" -> rbs.toString) - case None => Map() - }) - /** Default pipeline config */ override def defaults = Map( + "merge_strategy" -> "preprocessmergesam", "gsnap" -> Map( "novelsplicing" -> 1, "batch" -> 4, "format" -> "sam" ), + "bammetrics" -> Map( + "transcript_refflat" -> annotationRefFlat, + "collectrnaseqmetrics" -> ((if (strandProtocol != null) Map( + "strand_specificity" -> (strandProtocol match { + case NonSpecific => StrandSpecificity.NONE.toString + case Dutp => StrandSpecificity.SECOND_READ_TRANSCRIPTION_STRAND.toString + case otherwise => throw new IllegalStateException(otherwise.toString) + }) + ) + else Map()) ++ (if (ribosomalRefFlat != null) ribosomalRefFlat.map("ribosomal_intervals" -> _.getAbsolutePath).toList else Nil)) + ), "cutadapt" -> Map("minimum_length" -> 20), // avoid conflicts when merging since the MarkDuplicate tags often cause merges to fail "picard" -> Map( @@ -127,8 +132,7 @@ class Gentrap(val root: Configurable) extends QScript ), // disable markduplicates since it may not play well with all aligners (this can still be overriden via config) "mapping" -> Map( - "skip_markduplicates" -> true, - "skip_metrics" -> true + "skip_markduplicates" -> true ) ) @@ -297,8 +301,7 @@ class Gentrap(val root: Configurable) extends QScript def summaryFile: File = new File(outputDir, "gentrap.summary.json") /** Files that will be listed in the summary file */ - def summaryFiles: Map[String, File] = Map( - "reference_fasta" -> referenceFasta(), + override def summaryFiles: Map[String, File] = super.summaryFiles ++ Map( "annotation_refflat" -> annotationRefFlat ) ++ Map( "annotation_gtf" -> annotationGtf, @@ -312,13 +315,12 @@ class Gentrap(val root: Configurable) extends QScript def summaryStats: Map[String, Any] = Map() /** Pipeline settings shown in the summary file */ - def summarySettings: Map[String, Any] = Map( + override def summarySettings: Map[String, Any] = super.summarySettings ++ Map( "aligner" -> aligner, "expression_measures" -> expMeasures.toList.map(_.toString), "strand_protocol" -> strandProtocol.toString, "call_variants" -> callVariants, "remove_ribosomal_reads" -> removeRibosomalReads, - "reference" -> referenceSummary, "version" -> FullVersion ) @@ -340,7 +342,9 @@ class Gentrap(val root: Configurable) extends QScript } /** Steps to run before biopetScript */ - def init(): Unit = { + override def init(): Unit = { + super.init() + // TODO: validate that exons are flattened or not (depending on another option flag?) // validate required annotation files if (expMeasures.contains(FragmentsPerGene) && annotationGtf.isEmpty) @@ -368,13 +372,9 @@ class Gentrap(val root: Configurable) extends QScript if (annotationRefFlat.getName.nonEmpty) inputFiles :+= new InputFile(annotationRefFlat) } - /** Pipeline run for each sample */ - def biopetScript(): Unit = { - addSamplesJobs() - } - /** Pipeline run for multiple samples */ - def addMultiSampleJobs(): Unit = { + override def addMultiSampleJobs(): Unit = { + super.addMultiSampleJobs // merge expression tables mergeTableJobs.values.foreach { case maybeJob => maybeJob.foreach(add(_)) } // add heatmap jobs @@ -384,32 +384,31 @@ class Gentrap(val root: Configurable) extends QScript geneFragmentsCountJob } // TODO: use proper notation - addSummaryJobs() - add(pdfTemplateJob) - add(pdfReportJob) + //add(pdfTemplateJob) + //add(pdfReportJob) } /** Returns a [[Sample]] object */ - def makeSample(sampleId: String): Sample = new Sample(sampleId) + override def makeSample(sampleId: String): Sample = new Sample(sampleId) /** * Gentrap sample * * @param sampleId Unique identifier of the sample */ - class Sample(sampleId: String) extends AbstractSample(sampleId) with CufflinksProducer { + class Sample(sampleId: String) extends super.Sample(sampleId) with CufflinksProducer { /** Shortcut to qscript object */ protected def pipeline: Gentrap = qscript /** Summary stats of the sample */ - def summaryStats: Map[String, Any] = Map( + override def summaryStats: Map[String, Any] = super.summaryStats ++ Map( "all_paired" -> allPaired, "all_single" -> allSingle ) /** Summary files of the sample */ - def summaryFiles: Map[String, File] = Map( + override def summaryFiles: Map[String, File] = super.summaryFiles ++ Map( "alignment" -> alnFile ) ++ Map( "gene_fragments_count" -> geneFragmentsCount, @@ -425,13 +424,10 @@ class Gentrap(val root: Configurable) extends QScript "variant_calls" -> variantCalls ).collect { case (key, Some(value)) => key -> value } - /** Per-sample alignment file, pre rRNA cleanup (if chosen) */ - lazy val alnFileDirty: File = sampleAlnJobSet.alnJob.output - /** Per-sample alignment file, post rRNA cleanup (if chosen) */ lazy val alnFile: File = wipeJob match { case Some(j) => j.outputBam - case None => alnFileDirty + case None => preProcessBam.get } /** Read count per gene file */ @@ -698,24 +694,12 @@ class Gentrap(val root: Configurable) extends QScript job } - /** General metrics job, only when library > 1 */ - private lazy val bamMetricsModule: Option[BamMetrics] = (libraries.size > 1) - .option { - val mod = new BamMetrics(qscript) - mod.inputBam = alnFile - mod.outputDir = new File(sampleDir, "metrics") - mod.sampleId = Option(sampleId) - mod.transcriptRefFlatFile = Option(annotationRefFlat) - mod.rnaMetricsSettings = collectRnaSeqMetricsSettings - mod - } - /** Job for removing ribosomal reads */ private def wipeJob: Option[WipeReads] = removeRibosomalReads .option { //require(ribosomalRefFlat.isDefined) val job = new WipeReads(qscript) - job.inputBam = alnFileDirty + job.inputBam = bamFile.get ribosomalRefFlat.foreach(job.intervalFile = _) job.outputBam = createFile(".cleaned.bam") job.discardedBam = createFile(".rrna.bam") @@ -752,33 +736,19 @@ class Gentrap(val root: Configurable) extends QScript } } - /** Job for combining all library BAMs */ - private def sampleAlnJobSet: CombineFileJobSet = - makeCombineJob(libraries.values.map(_.alnFile).toList, createFile(".bam")) - /** Whether all libraries are paired or not */ - def allPaired: Boolean = libraries.values.forall(_.paired) + def allPaired: Boolean = libraries.values.forall(_.mapping.forall(_.input_R2.isDefined)) /** Whether all libraries are single or not */ - def allSingle: Boolean = libraries.values.forall(!_.paired) + def allSingle: Boolean = libraries.values.forall(_.mapping.forall(_.input_R2.isEmpty)) // TODO: add warnings or other messages for config values that are hard-coded by the pipeline /** Adds all jobs for the sample */ - def addJobs(): Unit = { + override def addJobs(): Unit = { + super.addJobs() // TODO: this is our requirement since it's easier to calculate base counts when all libraries are either paired or single require(allPaired || allSingle, s"Sample $sampleId contains only single-end or paired-end libraries") - // add per-library jobs - addPerLibJobs() // merge or symlink per-library alignments - sampleAlnJobSet.addAll() - bamMetricsModule match { - case Some(m) => - m.init() - m.biopetScript() - addAll(m.functions) - addSummaryQScript(m) - case None => ; - } // add bigwig output, also per-strand when possible addAll(Bam2Wig(qscript, alnFile).functions) alnFilePlusStrand.collect { case f => addAll(Bam2Wig(qscript, f).functions) } @@ -802,75 +772,6 @@ class Gentrap(val root: Configurable) extends QScript // add variant calling job if requested varCallJob.foreach(add(_)) } - - /** Returns a [[Library]] object */ - def makeLibrary(libId: String): Library = new Library(libId) - - /** - * Gentrap library - * - * @param libId Unique identifier of the library - */ - class Library(libId: String) extends AbstractLibrary(libId) { - - /** Summary stats of the library */ - def summaryStats: Map[String, Any] = Map() - - /** Summary files of the library */ - def summaryFiles: Map[String, File] = Map( - "alignment" -> mappingJob.outputFiles("finalBamFile") - ) - - /** Convenience method to check whether the library is paired or not */ - def paired: Boolean = config.contains("R2") - - /** Alignment results of this library ~ can only be accessed after addJobs is run! */ - def alnFile: File = mappingJob.outputFiles("finalBamFile") - - /** Wiggle track job */ - private lazy val bam2wigModule: Bam2Wig = Bam2Wig(qscript, alnFile) - - /** Per-library mapping job */ - def mappingJob: Mapping = { - val job = new Mapping(qscript) - job.sampleId = Option(sampleId) - job.libId = Option(libId) - job.outputDir = libDir - job.input_R1 = config("R1") - job.input_R2 = config("R2") - job.init() - job.biopetScript() - job - } - - /** Library metrics job, since we don't have access to the underlying metrics */ - private lazy val bamMetricsJob: BamMetrics = { - val mod = new BamMetrics(qscript) - mod.inputBam = alnFile - mod.outputDir = new File(libDir, "metrics") - mod.sampleId = Option(sampleId) - mod.libId = Option(libId) - mod.rnaMetricsSettings = collectRnaSeqMetricsSettings - mod.transcriptRefFlatFile = Option(annotationRefFlat) - mod - } - - /** Adds all jobs for the library */ - def addJobs(): Unit = { - // create per-library alignment file - addAll(mappingJob.functions) - // Input file checking - inputFiles :::= mappingJob.inputFiles - // add bigwig track - addAll(bam2wigModule.functions) - qscript.addSummaryQScript(mappingJob) - bamMetricsJob.init() - bamMetricsJob.biopetScript() - addAll(bamMetricsJob.functions) - qscript.addSummaryQScript(bamMetricsJob) - } - - } } } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala index 55f50882862db3bdc787fffdbeb6be35cf24d009..16b8a6fff7076ae9aa9d09a59c4d8fb52c4b75f9 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapReport.scala @@ -15,10 +15,9 @@ */ package nl.lumc.sasc.biopet.pipelines.gentrap +import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportSection } +import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingReportTrait import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportSection, ReportPage, MultisampleReportBuilder } -import nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport -import nl.lumc.sasc.biopet.pipelines.flexiprep.FlexiprepReport /** * Class to generate report for [[Gentrap]] @@ -29,69 +28,12 @@ class GentrapReport(val root: Configurable) extends ReportBuilderExtension { def builder = GentrapReport } -object GentrapReport extends MultisampleReportBuilder { +object GentrapReport extends MultisampleMappingReportTrait { - /** Root page for the carp report */ - def indexPage = { - //Source.fromInputStream(getClass.getResourceAsStream("/nl/lumc/sasc/biopet/pipelines/carp/carpFont.ssp")).foreach(print(_)) - ReportPage( - List("Samples" -> generateSamplesPage(pageArgs)) ++ - Map("Files" -> filesPage, - "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp" - )), Map()) - ), - List( - "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gentrap/gentrapFront.ssp"), - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false) - ), - "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp", - Map("showPlot" -> true, "showTable" -> false)), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp", - Map("showPlot" -> true, "showTable" -> false)) - ), - pageArgs - ) - } - - /** Files page, can be used general or at sample level */ - def filesPage: ReportPage = ReportPage(List(), List( - "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"), - "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"), - "Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)) //, - //"Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", - // Map("pipelineName" -> "shiva", "fileTag" -> "preProcessBam")) - ), Map()) - - /** Single sample page */ - def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = { - ReportPage(List( - "Libraries" -> generateLibraryPage(args), - "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), None), - "Files" -> filesPage - ), List( - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", - if (summary.libraries(sampleId).size > 1) Map("showPlot" -> true) else Map()), - "Preprocessing" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true)), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ), args) - } - - /** Library page */ - def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = { - ReportPage(List( - "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)), - "QC" -> FlexiprepReport.flexiprepPage - ), List( - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ), args) - } + override def frontSection = ReportSection("/nl/lumc/sasc/biopet/pipelines/gentrap/gentrapFront.ssp") /** Name of the report */ def reportName = "Gentrap Report" + + override def pipelineName = "gentrap" } \ No newline at end of file diff --git a/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala b/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala index 570a6e378ad07649cc0d0fea1f0548b424f0b04b..321fea8ba5b815d4dd71bf6f7cbdb85e293a9a8f 100644 --- a/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala +++ b/public/gentrap/src/test/scala/nl/lumc/sasc/biopet/pipelines/gentrap/GentrapTest.scala @@ -20,6 +20,7 @@ import java.io.{ File, FileOutputStream } import com.google.common.io.Files import nl.lumc.sasc.biopet.utils.config.Config import nl.lumc.sasc.biopet.extensions._ +import nl.lumc.sasc.biopet.extensions.gmap.Gsnap import nl.lumc.sasc.biopet.pipelines.gentrap.scripts.AggrBaseCount import nl.lumc.sasc.biopet.utils.ConfigUtils import org.apache.commons.io.FileUtils diff --git a/public/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/multisampleMappingFront.ssp b/public/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/multisampleMappingFront.ssp new file mode 100644 index 0000000000000000000000000000000000000000..a7027c4681c22f8119456fd445c5ddf83bb2366b --- /dev/null +++ b/public/mapping/src/main/resources/nl/lumc/sasc/biopet/pipelines/mapping/multisampleMappingFront.ssp @@ -0,0 +1,36 @@ +#import(nl.lumc.sasc.biopet.utils.summary.Summary) +#import(nl.lumc.sasc.biopet.core.report.ReportPage) +<%@ var summary: Summary %> +<%@ var rootPath: String %> + +<table class="table"> +<tbody> + <tr><th>Pipeline</th><td>Shiva</td></tr> + <tr><th>Version</th><td>${summary.getValue("meta", "pipeline_version")}</td></tr> + <tr><th>Last commit hash</th><td>${summary.getValue("meta", "last_commit_hash")}</td></tr> + <tr><th>Output directory</th><td>${summary.getValue("meta", "output_dir")}</td></tr> + <tr><th>Reference</th><td>${summary.getValue("shiva", "settings", "reference", "species")} - ${summary.getValue("shiva", "settings", "reference", "name")}</td></tr> + <tr><th>Number of samples</th><td>${summary.samples.size}</td></tr> +</tbody> +</table> +<br/> +<div class="row"> + <div class="col-md-1"></div> + <div class="col-md-6"> + <p> + In this web document you can find your <em>Shiva</em> pipeline report. + Different categories of data can be found in the left-side menu. + Statistics per sample and library can be accessed through the top-level menu. + Some statistics for target regions can be found in the regions tab. + Futhermore, you can view all versions of software tools used by selecting <em>Versions</em> from the top menu. + </p> + + <p> + <small>Brought to you by <a href="https://sasc.lumc.nl" target="_blank"><abbr + title="Sequence Analysis Support Core">SASC</abbr></a> and <a + href="https://www.lumc.nl/org/klinische-genetica/" target="_blank"><abbr title="Clinical Genetics LUMC">KG</abbr></a>, + LUMC. + </small> + </p> + </div> +</div> \ No newline at end of file diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index c588c4413938aeb56ae36af36c903f2263445239..716b99d0924ee64aa2b9ae13f6f631941080b70c 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -20,10 +20,12 @@ import java.util.Date import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.summary.SummaryQScript +import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie2, Bowtie } import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse } +import nl.lumc.sasc.biopet.extensions.gmap.Gsnap import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, MergeSamFiles, ReorderSam, SortSam } import nl.lumc.sasc.biopet.extensions.tools.FastqSplitter -import nl.lumc.sasc.biopet.extensions.{ Gsnap, Tophat, _ } +import nl.lumc.sasc.biopet.extensions._ import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep @@ -105,7 +107,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S ) /** File to add to the summary */ - def summaryFiles: Map[String, File] = Map("output_bamfile" -> finalBamFile, "input_R1" -> input_R1, + def summaryFiles: Map[String, File] = Map("output_bam" -> finalBamFile, "input_R1" -> input_R1, "reference" -> referenceFasta()) ++ (if (input_R2.isDefined) Map("input_R2" -> input_R2.get) else Map()) @@ -219,6 +221,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S case "bwa-mem" => addBwaMem(R1, R2, outputBam) case "bwa-aln" => addBwaAln(R1, R2, outputBam) case "bowtie" => addBowtie(R1, R2, outputBam) + case "bowtie2" => addBowtie2(R1, R2, outputBam) case "gsnap" => addGsnap(R1, R2, outputBam) // TODO: make TopHat here accept multiple input files case "tophat" => addTophat(R1, R2, outputBam) @@ -243,7 +246,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S add(md) addSummarizable(md, "mark_duplicates") } else if (skipMarkduplicates && chunking) { - val mergeSamFile = MergeSamFiles(this, bamFiles, outputDir) + val mergeSamFile = MergeSamFiles(this, bamFiles, new File(outputDir, outputName + ".merge.bam")) add(mergeSamFile) bamFile = mergeSamFile.output } @@ -377,7 +380,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S // merge with mapped file val mergeSamFile = MergeSamFiles(this, List(tophat.outputAcceptedHits, sorter.output), - tophat.output_dir, "coordinate") + new File(tophat.output_dir, "fixed_merged.bam"), sortOrder = "coordinate") mergeSamFile.createIndex = true mergeSamFile.isIntermediate = true add(mergeSamFile) @@ -392,6 +395,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S add(ar._1) ar._2 } + /** Adds stampy jobs */ def addStampy(R1: File, R2: Option[File], output: File): File = { @@ -437,6 +441,25 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S ar._2 } + /** Add bowtie2 jobs **/ + def addBowtie2(R1: File, R2: Option[File], output: File): File = { + val bowtie2 = new Bowtie2(this) + bowtie2.rg_id = Some(readgroupId) + bowtie2.rg +:= ("LB:" + libId.get) + bowtie2.rg +:= ("PL:" + platform) + bowtie2.rg +:= ("PU:" + platformUnit) + bowtie2.rg +:= ("SM:" + sampleId.get) + bowtie2.R1 = R1 + bowtie2.R2 = R2 + val sortSam = new SortSam(this) + sortSam.output = output + val pipe = bowtie2 | sortSam + pipe.isIntermediate = chunking || !skipMarkduplicates + pipe.threadsCorrection = -1 + add(pipe) + output + } + /** Adds Star jobs */ def addStar(R1: File, R2: Option[File], output: File): File = { val zcatR1 = extractIfNeeded(R1, output.getParentFile) diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala new file mode 100644 index 0000000000000000000000000000000000000000..2ace6f962b245acca0e65212ea6ee11bb478ebf0 --- /dev/null +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala @@ -0,0 +1,97 @@ +package nl.lumc.sasc.biopet.pipelines.mapping + +import nl.lumc.sasc.biopet.core.report.{ ReportBuilderExtension, ReportSection, ReportPage, MultisampleReportBuilder } +import nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport +import nl.lumc.sasc.biopet.pipelines.flexiprep.FlexiprepReport +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** + * Created by pjvanthof on 11/01/16. + */ +class MultisampleMappingReport(val root: Configurable) extends ReportBuilderExtension { + def builder = MultisampleMappingReport +} + +object MultisampleMappingReport extends MultisampleMappingReportTrait { + /** Name of the report */ + def reportName = "Mapping Report" +} + +trait MultisampleMappingReportTrait extends MultisampleReportBuilder { + /** Front section for the report */ + def frontSection: ReportSection = ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/multisampleMappingFront.ssp") + + def pipelineName = "multisamplemapping" + + /** Root page for the carp report */ + def indexPage = { + + val wgsExecuted = summary.getSampleValues("bammetrics", "stats", "wgs").values.exists(_.isDefined) + val rnaExecuted = summary.getSampleValues("bammetrics", "stats", "rna").values.exists(_.isDefined) + + ReportPage( + List("Samples" -> generateSamplesPage(pageArgs)) ++ + Map("Reference" -> ReportPage(List(), List( + "Reference" -> ReportSection("/nl/lumc/sasc/biopet/core/report/reference.ssp", Map("pipeline" -> pipelineName)) + ), Map()), + "Files" -> filesPage, + "Versions" -> ReportPage(List(), List("Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp" + )), Map()) + ), + List( + "Report" -> frontSection, + "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", + Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false) + ), + "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", + Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false))) ++ + (if (wgsExecuted) List("Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", + Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false))) + else Nil) ++ + (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp", + Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false))) + else Nil) ++ + List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp", + Map("showPlot" -> true, "showTable" -> false)), + "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp", + Map("showPlot" -> true, "showTable" -> false)) + ), + pageArgs + ) + } + + /** Files page, can be used general or at sample level */ + def filesPage: ReportPage = ReportPage(List(), List( + "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"), + "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"), + "Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)), + "Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", + Map("pipelineName" -> pipelineName, "fileTag" -> "output_bam_preprocess"))), Map()) + + /** Single sample page */ + def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = { + ReportPage(List( + "Libraries" -> generateLibraryPage(args), + "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), None), + "Files" -> filesPage + ), List( + "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", + if (summary.libraries(sampleId).size > 1) Map("showPlot" -> true) else Map()), + "Preprocessing" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true))) ++ + List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), + "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") + ), args) + } + + /** Library page */ + def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = { + ReportPage(List( + "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)), + "QC" -> FlexiprepReport.flexiprepPage + ), List( + "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"), + "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), + "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") + ), args) + } +} \ No newline at end of file diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTrait.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTrait.scala new file mode 100644 index 0000000000000000000000000000000000000000..19348061dbe29285437ab08c48cd43786d03aca6 --- /dev/null +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTrait.scala @@ -0,0 +1,226 @@ +package nl.lumc.sasc.biopet.pipelines.mapping + +import java.io.File + +import htsjdk.samtools.SamReaderFactory +import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension +import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, MultiSampleQScript } +import nl.lumc.sasc.biopet.extensions.Ln +import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, MergeSamFiles, AddOrReplaceReadGroups, SamToFastq } +import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics +import nl.lumc.sasc.biopet.utils.Logging +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.QScript + +import MultisampleMapping.MergeStrategy + +import scala.collection.JavaConversions._ + +/** + * Created by pjvanthof on 18/12/15. + */ +trait MultisampleMappingTrait extends MultiSampleQScript + with Reference { qscript: QScript => + + def mergeStrategy: MergeStrategy.Value = { + val value: String = config("merge_strategy", default = "preprocessmarkduplicates") + MergeStrategy.values.find(_.toString.toLowerCase == value) match { + case Some(v) => v + case _ => throw new IllegalArgumentException(s"merge_strategy '$value' does not exist") + } + } + + def init(): Unit = { + } + + def biopetScript(): Unit = { + addSamplesJobs() + addSummaryJobs() + } + + override def reportClass: Option[ReportBuilderExtension] = { + val report = new MultisampleMappingReport(this) + report.outputDir = new File(outputDir, "report") + report.summaryFile = summaryFile + Some(report) + } + + def addMultiSampleJobs(): Unit = { + // this code will be executed after all code of all samples is executed + } + + def summaryFiles: Map[String, File] = Map("referenceFasta" -> referenceFasta()) + + def summarySettings: Map[String, Any] = Map( + "reference" -> referenceSummary, + "merge_strategy" -> mergeStrategy.toString) + + def makeSample(id: String) = new Sample(id) + class Sample(sampleId: String) extends AbstractSample(sampleId) { + + def makeLibrary(id: String) = new Library(id) + class Library(libId: String) extends AbstractLibrary(libId) { + def summaryFiles: Map[String, File] = (inputR1.map("input_R1" -> _) :: inputR2.map("input_R2" -> _) :: + inputBam.map("input_bam" -> _) :: bamFile.map("output_bam" -> _) :: + preProcessBam.map("output_bam_preprocess" -> _) :: Nil).flatten.toMap + + def summaryStats: Map[String, Any] = Map() + + lazy val inputR1: Option[File] = MultisampleMapping.fileMustBeAbsolute(config("R1")) + lazy val inputR2: Option[File] = MultisampleMapping.fileMustBeAbsolute(config("R2")) + lazy val inputBam: Option[File] = MultisampleMapping.fileMustBeAbsolute(if (inputR1.isEmpty) config("bam") else None) + lazy val bamToFastq: Boolean = config("bam_to_fastq", default = false) + lazy val correctReadgroups: Boolean = config("correct_readgroups", default = false) + + lazy val mapping = if (inputR1.isDefined || (inputBam.isDefined && bamToFastq)) { + val m = new Mapping(qscript) + m.sampleId = Some(sampleId) + m.libId = Some(libId) + m.outputDir = libDir + Some(m) + } else None + + def bamFile = mapping match { + case Some(m) => Some(m.finalBamFile) + case _ if inputBam.isDefined => Some(new File(libDir, s"$sampleId-$libId.bam")) + case _ => None + } + + def preProcessBam = bamFile + + def addJobs(): Unit = { + inputR1.foreach(inputFiles :+= new InputFile(_, config("R1_md5"))) + inputR2.foreach(inputFiles :+= new InputFile(_, config("R2_md5"))) + inputBam.foreach(inputFiles :+= new InputFile(_, config("bam_md5"))) + + if (inputR1.isDefined) { + mapping.foreach { m => + m.input_R1 = inputR1.get + m.input_R2 = inputR2 + add(m) + } + } else if (inputBam.isDefined) { + if (bamToFastq) { + val samToFastq = SamToFastq(qscript, inputBam.get, + new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"), + new File(libDir, sampleId + "-" + libId + ".R2.fq.gz")) + samToFastq.isIntermediate = true + qscript.add(samToFastq) + mapping.foreach(m => { + m.input_R1 = samToFastq.fastqR1 + m.input_R2 = Some(samToFastq.fastqR2) + add(m) + }) + } else { + val inputSam = SamReaderFactory.makeDefault.open(inputBam.get) + val readGroups = inputSam.getFileHeader.getReadGroups + + val readGroupOke = readGroups.forall(readGroup => { + if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same") + if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same") + readGroup.getSample == sampleId && readGroup.getLibrary == libId + }) + inputSam.close() + + if (!readGroupOke) { + if (correctReadgroups) { + logger.info("Correcting readgroups, file:" + inputBam.get) + val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get) + aorrg.RGID = s"$sampleId-$libId" + aorrg.RGLB = libId + aorrg.RGSM = sampleId + aorrg.RGPL = "unknown" + aorrg.RGPU = "na" + aorrg.isIntermediate = true + qscript.add(aorrg) + } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile + + "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this") + } else { + val oldBamFile: File = inputBam.get + val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai") + val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai") + val baiLn = Ln(qscript, oldIndex, newIndex) + add(baiLn) + + val bamLn = Ln(qscript, oldBamFile, bamFile.get) + bamLn.deps :+= baiLn.output + add(bamLn) + } + + val bamMetrics = new BamMetrics(qscript) + bamMetrics.sampleId = Some(sampleId) + bamMetrics.libId = Some(libId) + bamMetrics.inputBam = bamFile.get + bamMetrics.outputDir = new File(libDir, "metrics") + add(bamMetrics) + } + } else logger.warn(s"Sample '$sampleId' does not have any input files") + } + } + + def summaryFiles: Map[String, File] = (bamFile.map("output_bam" -> _) :: + preProcessBam.map("output_bam_preprocess" -> _) :: Nil).flatten.toMap + + def summaryStats: Map[String, Any] = Map() + + def bamFile = if (libraries.flatMap(_._2.bamFile).nonEmpty && + mergeStrategy != MultisampleMapping.MergeStrategy.None) + Some(new File(sampleDir, s"$sampleId.bam")) + else None + + def preProcessBam = bamFile + + def keepMergedFiles: Boolean = config("keep_merged_files", default = true) + + def addJobs(): Unit = { + addPerLibJobs() // This add jobs for each library + + mergeStrategy match { + case MergeStrategy.None => + case (MergeStrategy.MergeSam | MergeStrategy.MarkDuplicates) if libraries.flatMap(_._2.bamFile).size == 1 => + add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.bamFile).head, bamFile.get): _*) + case (MergeStrategy.PreProcessMergeSam | MergeStrategy.PreProcessMarkDuplicates) if libraries.flatMap(_._2.preProcessBam).size == 1 => + add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, bamFile.get): _*) + case MergeStrategy.MergeSam => + add(MergeSamFiles(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = keepMergedFiles)) + case MergeStrategy.PreProcessMergeSam => + add(MergeSamFiles(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get, isIntermediate = keepMergedFiles)) + case MergeStrategy.MarkDuplicates => + add(MarkDuplicates(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = keepMergedFiles)) + case MergeStrategy.PreProcessMarkDuplicates => + add(MarkDuplicates(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get, isIntermediate = keepMergedFiles)) + case _ => throw new IllegalStateException("This should not be possible, unimplemented MergeStrategy?") + } + + if (mergeStrategy != MergeStrategy.None && libraries.flatMap(_._2.bamFile).nonEmpty) { + val bamMetrics = new BamMetrics(qscript) + bamMetrics.sampleId = Some(sampleId) + bamMetrics.inputBam = preProcessBam.get + bamMetrics.outputDir = new File(sampleDir, "metrics") + add(bamMetrics) + } + } + } +} + +class MultisampleMapping(val root: Configurable) extends QScript with MultisampleMappingTrait { + def this() = this(null) + + def summaryFile: File = new File(outputDir, "MultisamplePipeline.summary.json") +} + +object MultisampleMapping extends PipelineCommand { + + object MergeStrategy extends Enumeration { + val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates = Value + } + + def fileMustBeAbsolute(file: Option[File]): Option[File] = { + if (file.forall(_.isAbsolute)) file + else { + Logging.addError(s"$file should be a absolute file path") + file.map(_.getAbsoluteFile) + } + } + +} diff --git a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala index 377c771cee17b62922381cd09d9c4ebd0aa15aec..7af4c60d5a2dc07b31b60864c5557287fc949fca 100644 --- a/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala +++ b/public/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala @@ -25,7 +25,7 @@ import org.apache.commons.io.FileUtils import org.broadinstitute.gatk.queue.QSettings import org.scalatest.Matchers import org.scalatest.testng.TestNGSuite -import org.testng.annotations.{ AfterClass, DataProvider, Test } +import org.testng.annotations.{ BeforeClass, AfterClass, DataProvider, Test } /** * Test class for [[Mapping]] @@ -64,20 +64,20 @@ abstract class AbstractTestMapping(val aligner: String) extends TestNGSuite with skipMarkDuplicate: Boolean, skipFlexiprep: Boolean, zipped: Boolean) = { - val map = ConfigUtils.mergeMaps(Map("output_dir" -> MappingTest.outputDir, + val map = ConfigUtils.mergeMaps(Map("output_dir" -> outputDir, "aligner" -> aligner, "number_chunks" -> chunks, "skip_markduplicates" -> skipMarkDuplicate, "skip_flexiprep" -> skipFlexiprep - ), Map(MappingTest.executables.toSeq: _*)) + ), Map(executables.toSeq: _*)) val mapping: Mapping = initPipeline(map) if (zipped) { - mapping.input_R1 = MappingTest.r1Zipped - if (paired) mapping.input_R2 = Some(MappingTest.r2Zipped) + mapping.input_R1 = r1Zipped + if (paired) mapping.input_R2 = Some(r2Zipped) } else { - mapping.input_R1 = MappingTest.r1 - if (paired) mapping.input_R2 = Some(MappingTest.r2) + mapping.input_R1 = r1 + if (paired) mapping.input_R2 = Some(r2) } mapping.sampleId = Some("1") mapping.libId = Some("1") @@ -87,34 +87,28 @@ abstract class AbstractTestMapping(val aligner: String) extends TestNGSuite with mapping.functions.count(_.isInstanceOf[Fastqc]) shouldBe (if (skipFlexiprep) 0 else if (paired) 4 else 2) } - // remove temporary run directory all tests in the class have been run - @AfterClass def removeTempOutputDir() = { - FileUtils.deleteDirectory(MappingTest.outputDir) - } -} - -class MappingBwaMemTest extends AbstractTestMapping("bwa-mem") -class MappingBwaAlnTest extends AbstractTestMapping("bwa-aln") -class MappingStarTest extends AbstractTestMapping("star") -class MappingStar2PassTest extends AbstractTestMapping("star-2pass") -class MappingBowtieTest extends AbstractTestMapping("bowtie") -class MappingStampyTest extends AbstractTestMapping("stampy") -class MappingGsnapTest extends AbstractTestMapping("gsnap") -class MappingTophatTest extends AbstractTestMapping("tophat") - -object MappingTest { - val outputDir = Files.createTempDir() new File(outputDir, "input").mkdirs() val r1 = new File(outputDir, "input" + File.separator + "R1.fq") - Files.touch(r1) val r2 = new File(outputDir, "input" + File.separator + "R2.fq") - Files.touch(r2) val r1Zipped = new File(outputDir, "input" + File.separator + "R1.fq.gz") - Files.touch(r1Zipped) val r2Zipped = new File(outputDir, "input" + File.separator + "R2.fq.gz") - Files.touch(r2Zipped) + + @BeforeClass + def createTempFiles: Unit = { + Files.touch(r1) + Files.touch(r2) + Files.touch(r1Zipped) + Files.touch(r2Zipped) + + copyFile("ref.fa") + copyFile("ref.dict") + copyFile("ref.fa.fai") + copyFile("ref.1.bt2") + copyFile("ref.1.ebwt") + + } private def copyFile(name: String): Unit = { val is = getClass.getResourceAsStream("/" + name) @@ -123,12 +117,6 @@ object MappingTest { os.close() } - copyFile("ref.fa") - copyFile("ref.dict") - copyFile("ref.fa.fai") - copyFile("ref.1.bt2") - copyFile("ref.1.ebwt") - val executables = Map( "reference_fasta" -> (outputDir + File.separator + "ref.fa"), "db" -> "test", @@ -142,8 +130,24 @@ object MappingTest { "bwa" -> Map("exe" -> "test"), "star" -> Map("exe" -> "test"), "bowtie" -> Map("exe" -> "test"), + "bowtie2" -> Map("exe" -> "test"), "stampy" -> Map("exe" -> "test", "genome" -> "test", "hash" -> "test"), "samtools" -> Map("exe" -> "test"), "md5sum" -> Map("exe" -> "test") ) -} \ No newline at end of file + + // remove temporary run directory all tests in the class have been run + @AfterClass def removeTempOutputDir() = { + FileUtils.deleteDirectory(outputDir) + } +} + +class MappingBwaMemTest extends AbstractTestMapping("bwa-mem") +class MappingBwaAlnTest extends AbstractTestMapping("bwa-aln") +class MappingStarTest extends AbstractTestMapping("star") +class MappingStar2PassTest extends AbstractTestMapping("star-2pass") +class MappingBowtieTest extends AbstractTestMapping("bowtie") +class MappingBowtie2Test extends AbstractTestMapping("bowtie2") +class MappingStampyTest extends AbstractTestMapping("stampy") +class MappingGsnapTest extends AbstractTestMapping("gsnap") +class MappingTophatTest extends AbstractTestMapping("tophat") diff --git a/public/pom.xml b/public/pom.xml index 4a2eb7dbb7237603eb95855d39973114d8fe3c3e..28cd0dc788e7b704d8930f94cd9b6a02759d57c6 100644 --- a/public/pom.xml +++ b/public/pom.xml @@ -40,6 +40,7 @@ <module>toucan</module> <module>shiva</module> <module>basty</module> + <module>generate-indexes</module> <module>biopet-core</module> <module>biopet-utils</module> <module>biopet-tools</module> @@ -52,6 +53,7 @@ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <scoverage.plugin.version>1.1.1</scoverage.plugin.version> <scalaVersion>2.10.4</scalaVersion> + <scoverage.aggregate>true</scoverage.aggregate> </properties> <build> @@ -248,6 +250,7 @@ <scalaVersion>${scalaVersion}</scalaVersion> <aggregate>true</aggregate> <highlighting>true</highlighting> + <aggregate>true</aggregate> <!-- other parameters --> </configuration> </plugin> diff --git a/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala b/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala index cbe6fd6eb56e9d7eab647cad238456717d10a6b8..8d28a6ea13e1e09ce16afbec4faccc29631945c8 100644 --- a/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala +++ b/public/sage/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala @@ -125,7 +125,7 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { val bamFile: File = if (libraryBamfiles.size == 1) libraryBamfiles.head else if (libraryBamfiles.size > 1) { - val mergeSamFiles = MergeSamFiles(qscript, libraryBamfiles, sampleDir) + val mergeSamFiles = MergeSamFiles(qscript, libraryBamfiles, new File(sampleDir, s"$sampleId.bam")) qscript.add(mergeSamFiles) mergeSamFiles.output } else null diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala index 5c88189833b1b6bd169c3da475c6c2370957abf9..decc33b463fb35a03e1d6db575ae9d81cbe69fb1 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala @@ -17,12 +17,11 @@ package nl.lumc.sasc.biopet.pipelines.shiva import java.io.{ File, PrintWriter } -import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.report._ -import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue } +import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingReportTrait +import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.rscript.StackedBarPlot -import nl.lumc.sasc.biopet.pipelines.bammetrics.BammetricsReport -import nl.lumc.sasc.biopet.pipelines.flexiprep.FlexiprepReport +import nl.lumc.sasc.biopet.utils.summary.{ Summary, SummaryValue } /** * With this extension the report is executed within a pipeline @@ -34,52 +33,32 @@ class ShivaReport(val root: Configurable) extends ReportBuilderExtension { } /** Object for report generation for Shiva pipeline */ -object ShivaReport extends MultisampleReportBuilder { +object ShivaReport extends MultisampleMappingReportTrait { def variantcallingExecuted = summary.getValue("shiva", "settings", "multisample_variantcalling") match { case Some(true) => true case _ => false } + override def frontSection = ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp") + + override def pipelineName = "shiva" + override def extFiles = super.extFiles ++ List("js/gears.js") .map(x => ExtFile("/nl/lumc/sasc/biopet/pipelines/gears/report/ext/" + x, x)) /** Root page for the shiva report */ - def indexPage = { + override def indexPage = { + val variantcallingSection = (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", + Map("showPlot" -> true, "showTable" -> false))) + else Nil) + val regions = regionsPage - ReportPage( - List("Samples" -> generateSamplesPage(pageArgs)) ++ - (if (regions.isDefined) Map(regions.get) else Map()) ++ - Map("Reference" -> ReportPage(List(), List( - "Reference" -> ReportSection("/nl/lumc/sasc/biopet/core/report/reference.ssp", Map("pipeline" -> "shiva")) - ), Map()), - "Files" -> filesPage, - "Versions" -> ReportPage(List(), List( - "Executables" -> ReportSection("/nl/lumc/sasc/biopet/core/report/executables.ssp") - ), Map()) - ), - List( - "Report" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/shivaFront.ssp")) ++ - (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", - Map("showPlot" -> true, "showTable" -> false))) - else Nil) ++ - List("Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false) - ), - "Insert Size" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), - "Whole genome coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp", - Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp", - Map("showPlot" -> true, "showTable" -> false)), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp", - Map("showPlot" -> true, "showTable" -> false)) - ), - pageArgs - ) + val oldPage = super.indexPage + + oldPage.copy(sections = variantcallingSection ++ oldPage.sections, subPages = oldPage.subPages ++ regionsPage) } - //TODO: Add variants per target /** Generate a page with all target coverage stats */ def regionsPage: Option[(String, ReportPage)] = { val roi = summary.getValue("shiva", "settings", "regions_of_interest") @@ -121,49 +100,19 @@ object ShivaReport extends MultisampleReportBuilder { } /** Files page, can be used general or at sample level */ - def filesPage: ReportPage = ReportPage(List(), List( - "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"), - "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"), - "Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)), - "Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", - Map("pipelineName" -> "shiva", "fileTag" -> "preProcessBam"))) ++ - (if (variantcallingExecuted) List("VCF files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp", + override def filesPage: ReportPage = { + val vcfFilesSection = if (variantcallingExecuted) List("VCF files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/outputVcfFiles.ssp", Map("sampleId" -> None))) - else Nil), Map()) - - /** Single sample page */ - def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = { - ReportPage(List( - "Libraries" -> generateLibraryPage(args), - "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), None), - "Files" -> filesPage - ), List( - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", - if (summary.libraries(sampleId).size > 1) Map("showPlot" -> true) else Map()), - "Preprocessing" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true))) ++ - (if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp")) else Nil) ++ - List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ), args) + else Nil + val oldPage = super.filesPage + oldPage.copy(sections = oldPage.sections ++ vcfFilesSection) } - /** Library page */ - def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = { - val flexiprepExecuted = summary.getLibraryValue(sampleId, libId, "flexiprep").isDefined - val krakenExecuted = summary.getValue(Some(sampleId), Some(libId), "gears", "stats", "krakenreport").isDefined - - ReportPage( - "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)) :: - (if (flexiprepExecuted) List("QC" -> FlexiprepReport.flexiprepPage) else Nil - ) ::: (if (krakenExecuted) List("Gears - Metagenomics" -> ReportPage(List(), List( - "Sunburst analysis" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/gears/gearsSunburst.ssp" - )), Map())) - else Nil), "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp") :: - (if (flexiprepExecuted) List( - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ) - else Nil), args) + /** Single sample page */ + override def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = { + val variantcallingSection = if (variantcallingExecuted) List("Variantcalling" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp")) else Nil + val oldPage = super.samplePage(sampleId, args) + oldPage.copy(sections = variantcallingSection ++ oldPage.sections) } /** Name of the report */ diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala index ca2706f710c23ae1625d2908bdc914a2a687afb2..f0fe2c1291d647815f8d336902fbd944f6312f3f 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala @@ -15,37 +15,21 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import htsjdk.samtools.SamReaderFactory -import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference } -import nl.lumc.sasc.biopet.extensions.Ln -import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq } -import nl.lumc.sasc.biopet.pipelines.bammetrics.{ TargetRegions, BamMetrics } -import nl.lumc.sasc.biopet.pipelines.mapping.Mapping +import nl.lumc.sasc.biopet.core.Reference +import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension +import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions +import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait import nl.lumc.sasc.biopet.pipelines.toucan.Toucan -import nl.lumc.sasc.biopet.utils.Logging import org.broadinstitute.gatk.queue.QScript -import scala.collection.JavaConversions._ - /** * This is a trait for the Shiva pipeline * * Created by pjvan_thof on 2/26/15. */ -trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { qscript: QScript => - - /** Executed before running the script */ - def init(): Unit = { - } +trait ShivaTrait extends MultisampleMappingTrait with Reference with TargetRegions { qscript: QScript => - /** Method to add jobs */ - def biopetScript(): Unit = { - addSamplesJobs() - - addSummaryJobs() - } - - override def reportClass = { + override def reportClass: Option[ReportBuilderExtension] = { val shiva = new ShivaReport(this) shiva.outputDir = new File(outputDir, "report") shiva.summaryFile = summaryFile @@ -53,10 +37,10 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { } /** Method to make the variantcalling submodule of shiva */ - def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = { + def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait with QScript = { if (multisample) new ShivaVariantcalling(qscript) { override def namePrefix = "multisample" - override def configName = "shivavariantcalling" + override def configName: String = "shivavariantcalling" override def configPath: List[String] = super.configPath ::: "multisample" :: Nil } else new ShivaVariantcalling(qscript) { @@ -65,167 +49,29 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { } /** Method to make a sample */ - def makeSample(id: String) = new Sample(id) + override def makeSample(id: String) = new this.Sample(id) /** Class that will generate jobs for a sample */ - class Sample(sampleId: String) extends AbstractSample(sampleId) { - /** Sample specific files to add to summary */ - def summaryFiles: Map[String, File] = { - preProcessBam match { - case Some(b) => Map("preProcessBam" -> b) - case _ => Map() - } - } - - /** Sample specific stats to add to summary */ - def summaryStats: Map[String, Any] = Map() - + class Sample(sampleId: String) extends super.Sample(sampleId) { /** Method to make a library */ - def makeLibrary(id: String) = new Library(id) + override def makeLibrary(id: String) = new this.Library(id) /** Sample specific settings */ override def summarySettings = Map("single_sample_variantcalling" -> variantcalling.isDefined) /** Class to generate jobs for a library */ - class Library(libId: String) extends AbstractLibrary(libId) { - /** Library specific files to add to the summary */ - def summaryFiles: Map[String, File] = { - ((bamFile, preProcessBam) match { - case (Some(b), Some(pb)) => Map("bamFile" -> b, "preProcessBam" -> pb) - case (Some(b), _) => Map("bamFile" -> b, "preProcessBam" -> b) - case _ => Map() - }) ++ (inputR1.map("input_R1" -> _) :: - inputR2.map("input_R2" -> _) :: - inputBam.map("input_bam" -> _) :: Nil).flatten.toMap - } - - /** Library specific stats to add to summary */ - def summaryStats: Map[String, Any] = Map() - - /** Method to execute library preprocess */ - def preProcess(input: File): Option[File] = None - + class Library(libId: String) extends super.Library(libId) { /** Library specific settings */ override def summarySettings = Map("library_variantcalling" -> variantcalling.isDefined) - /** Method to make the mapping submodule */ - def makeMapping = { - val mapping = new Mapping(qscript) - mapping.sampleId = Some(sampleId) - mapping.libId = Some(libId) - mapping.outputDir = libDir - mapping.outputName = sampleId + "-" + libId - (Some(mapping), Some(mapping.finalBamFile), preProcess(mapping.finalBamFile)) - } - - def fileMustBeAbsulute(file: Option[File]): Option[File] = { - if (file.forall(_.isAbsolute)) file - else { - Logging.addError(s"$file for $sampleId / $libId should be a absolute file path") - file.map(_.getAbsoluteFile) - } - } - - lazy val inputR1: Option[File] = fileMustBeAbsulute(config("R1")) - lazy val inputR2: Option[File] = fileMustBeAbsulute(config("R2")) - lazy val inputBam: Option[File] = fileMustBeAbsulute(if (inputR1.isEmpty) config("bam") else None) - - lazy val (mapping, bamFile, preProcessBam): (Option[Mapping], Option[File], Option[File]) = - (inputR1.isDefined, inputBam.isDefined) match { - case (true, _) => makeMapping // Default starting from fastq files - case (false, true) => // Starting from bam file - config("bam_to_fastq", default = false).asBoolean match { - case true => makeMapping // bam file will be converted to fastq - case false => - val file = new File(libDir, sampleId + "-" + libId + ".final.bam") - (None, Some(file), preProcess(file)) - } - case _ => (None, None, None) - } - lazy val variantcalling = if (config("library_variantcalling", default = false).asBoolean && (bamFile.isDefined || preProcessBam.isDefined)) { Some(makeVariantcalling(multisample = false)) } else None /** This will add jobs for this library */ - def addJobs(): Unit = { - (inputR1.isDefined, inputBam.isDefined) match { - case (true, _) => mapping.foreach(mapping => { - mapping.input_R1 = inputR1.get - mapping.input_R2 = inputR2 - inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5")) - mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5"))) - }) - case (false, true) => { - inputFiles :+= new InputFile(inputBam.get, config("bam_md5")) - config("bam_to_fastq", default = false).asBoolean match { - case true => - val samToFastq = SamToFastq(qscript, inputBam.get, - new File(libDir, sampleId + "-" + libId + ".R1.fq.gz"), - new File(libDir, sampleId + "-" + libId + ".R2.fq.gz")) - samToFastq.isIntermediate = true - qscript.add(samToFastq) - mapping.foreach(mapping => { - mapping.input_R1 = samToFastq.fastqR1 - mapping.input_R2 = Some(samToFastq.fastqR2) - }) - case false => - val inputSam = SamReaderFactory.makeDefault.open(inputBam.get) - val readGroups = inputSam.getFileHeader.getReadGroups - - val readGroupOke = readGroups.forall(readGroup => { - if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same") - if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same") - readGroup.getSample == sampleId && readGroup.getLibrary == libId - }) - inputSam.close() - - if (!readGroupOke) { - if (config("correct_readgroups", default = false).asBoolean) { - logger.info("Correcting readgroups, file:" + inputBam.get) - val aorrg = AddOrReplaceReadGroups(qscript, inputBam.get, bamFile.get) - aorrg.RGID = sampleId + "-" + libId - aorrg.RGLB = libId - aorrg.RGSM = sampleId - aorrg.RGPL = "unknown" - aorrg.RGPU = "na" - aorrg.isIntermediate = true - qscript.add(aorrg) - } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile + - "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this") - } else { - val oldBamFile: File = inputBam.get - val oldIndex: File = new File(oldBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(libDir, bamFile.get.getName.stripSuffix(".bam") + ".bai") - val baiLn = Ln(qscript, oldIndex, newIndex) - add(baiLn) - - val bamLn = Ln(qscript, oldBamFile, bamFile.get) - bamLn.deps :+= baiLn.output - add(bamLn) - - val bamMetrics = new BamMetrics(qscript) - bamMetrics.sampleId = Some(sampleId) - bamMetrics.libId = Some(libId) - bamMetrics.inputBam = bamFile.get - bamMetrics.outputDir = new File(libDir, "metrics") - bamMetrics.init() - bamMetrics.biopetScript() - addAll(bamMetrics.functions) - addSummaryQScript(bamMetrics) - } - } - } - case _ => logger.warn("Sample: " + sampleId + " Library: " + libId + ", no reads found") - } - - mapping.foreach(mapping => { - mapping.init() - mapping.biopetScript() - addAll(mapping.functions) // Add functions of mapping to curent function pool - addSummaryQScript(mapping) - }) + override def addJobs() = { + super.addJobs() variantcalling.foreach(vc => { vc.sampleId = Some(sampleId) @@ -233,75 +79,25 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { vc.outputDir = new File(libDir, "variantcalling") if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get) else vc.inputBams = Map(sampleId -> bamFile.get) - vc.init() - vc.biopetScript() - addAll(vc.functions) - addSummaryQScript(vc) + add(vc) }) } } - /** This will add jobs for the double preprocessing */ - protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = { - if (input == Nil) None - else if (input.tail == Nil) { - val bamFile = new File(sampleDir, s"$sampleId.bam") - val oldIndex: File = new File(input.head.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = new File(sampleDir, s"$sampleId.bai") - val baiLn = Ln(qscript, oldIndex, newIndex) - add(baiLn) - - val bamLn = Ln(qscript, input.head, bamFile) - bamLn.deps :+= baiLn.output - add(bamLn) - Some(bamFile) - } else { - val md = new MarkDuplicates(qscript) - md.input = input - md.output = new File(sampleDir, sampleId + ".dedup.bam") - md.outputMetrics = new File(sampleDir, sampleId + ".dedup.metrics") - //FIXME: making this file intermediate make the pipeline restart unnessery jobs - //md.isIntermediate = isIntermediate - add(md) - addSummarizable(md, "mark_duplicates") - Some(md.output) - } - } - - lazy val preProcessBam: Option[File] = addDoublePreProcess(libraries.flatMap(lib => { - (lib._2.bamFile, lib._2.preProcessBam) match { - case (_, Some(file)) => Some(file) - case (Some(file), _) => Some(file) - case _ => None - } - }).toList) - lazy val variantcalling = if (config("single_sample_variantcalling", default = false).asBoolean) { Some(makeVariantcalling(multisample = false)) } else None /** This will add sample jobs */ - def addJobs(): Unit = { - addPerLibJobs() + override def addJobs(): Unit = { + super.addJobs() preProcessBam.foreach { bam => - val bamMetrics = new BamMetrics(qscript) - bamMetrics.sampleId = Some(sampleId) - bamMetrics.inputBam = bam - bamMetrics.outputDir = new File(sampleDir, "metrics") - bamMetrics.init() - bamMetrics.biopetScript() - addAll(bamMetrics.functions) - addSummaryQScript(bamMetrics) - variantcalling.foreach(vc => { vc.sampleId = Some(sampleId) vc.outputDir = new File(sampleDir, "variantcalling") vc.inputBams = Map(sampleId -> bam) - vc.init() - vc.biopetScript() - addAll(vc.functions) - addSummaryQScript(vc) + add(vc) }) } } @@ -321,32 +117,25 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { } else None /** This will add the mutisample variantcalling */ - def addMultiSampleJobs(): Unit = { + override def addMultiSampleJobs() = { + super.addMultiSampleJobs() + multisampleVariantCalling.foreach(vc => { vc.outputDir = new File(outputDir, "variantcalling") vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) } - vc.init() - vc.biopetScript() - addAll(vc.functions) - addSummaryQScript(vc) + add(vc) annotation.foreach { toucan => toucan.outputDir = new File(outputDir, "annotation") toucan.inputVCF = vc.finalFile - toucan.init() - toucan.biopetScript() - addAll(toucan.functions) - addSummaryQScript(toucan) + add(toucan) } }) svCalling.foreach(sv => { sv.outputDir = new File(outputDir, "sv_calling") sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) } - sv.init() - sv.biopetScript() - addAll(sv.functions) - addSummaryQScript(sv) + add(sv) }) } @@ -354,15 +143,11 @@ trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { def summaryFile = new File(outputDir, "Shiva.summary.json") /** Settings of pipeline for summary */ - def summarySettings = Map( - "reference" -> referenceSummary, + override def summarySettings = super.summarySettings ++ Map( "annotation" -> annotation.isDefined, "multisample_variantcalling" -> multisampleVariantCalling.isDefined, "sv_calling" -> svCalling.isDefined, "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")) ) - - /** Files for the summary */ - def summaryFiles = Map("referenceFasta" -> referenceFasta()) } diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala new file mode 100644 index 0000000000000000000000000000000000000000..9ed3b00d01d42e21eec88ac3e3ccb5a03af4203d --- /dev/null +++ b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala @@ -0,0 +1,64 @@ +package nl.lumc.sasc.biopet.pipelines.toucan + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.manwe.{ ManweAnnotateVcf, ManweSamplesActivate, ManweSamplesImport } +import nl.lumc.sasc.biopet.utils.config.Configurable + +import scala.io.Source + +/** + * Created by ahbbollen on 9-10-15. + * Wrapper for manwe activate after importing and annotating + */ +class ManweActivateAfterAnnotImport(root: Configurable, + annotate: ManweAnnotateVcf, + imported: ManweSamplesImport) extends ManweSamplesActivate(root) { + + override def beforeGraph: Unit = { + super.beforeGraph + require(annotate != null, "Annotate should be defined") + require(imported != null, "Imported should be defined") + this.deps :+= annotate.jobOutputFile + this.deps :+= imported.jobOutputFile + } + + override def beforeCmd: Unit = { + super.beforeCmd + + this.uri = getUri + } + + def getUriFromFile(f: File): String = { + val r = if (f.exists()) { + val reader = Source.fromFile(f) + val it = reader.getLines() + if (it.isEmpty) { + throw new IllegalArgumentException("Empty manwe stderr file") + } + it.filter(_.contains("Added sample")).toList.head.split(" ").last + } else { + "" + } + r + } + + def getUri: String = { + val err: Option[File] = Some(imported.jobOutputFile) + uri = err match { + case None => "" + case Some(s) => s match { + case null => "" + case other => getUriFromFile(other) + } + case _ => "" + } + + uri + } + + override def subCommand = { + required("samples") + required("activate") + getUri + } + +} diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala new file mode 100644 index 0000000000000000000000000000000000000000..e49ff7e5ed4d7ba91fd0bd1ef8b877bf58c3c296 --- /dev/null +++ b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala @@ -0,0 +1,60 @@ +package nl.lumc.sasc.biopet.pipelines.toucan + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.manwe.{ ManweAnnotateVcf, ManweDataSourcesDownload } +import nl.lumc.sasc.biopet.utils.config.Configurable + +import scala.io.Source + +/** + * Created by ahbbollen on 9-10-15. + */ +class ManweDownloadAfterAnnotate(root: Configurable, + annotate: ManweAnnotateVcf) extends ManweDataSourcesDownload(root) { + + override def beforeGraph: Unit = { + super.beforeGraph + require(annotate != null, "Annotate should be defined") + this.deps :+= annotate.jobOutputFile + } + + override def beforeCmd: Unit = { + super.beforeCmd + + this.uri = getUri + } + + def getUriFromFile(f: File): String = { + val r = if (f.exists()) { + val reader = Source.fromFile(f) + val it = reader.getLines() + if (it.isEmpty) { + throw new IllegalArgumentException("Empty manwe stderr file") + } + it.filter(_.contains("Annotated VCF file")).toList.head.split(" ").last + } else { + "" + } + r + } + + def getUri: String = { + val err: Option[File] = Some(annotate.jobOutputFile) + uri = err match { + case None => "" + case Some(s) => s match { + case null => "" + case other => getUriFromFile(other) + } + case _ => "" + } + + uri + } + + override def subCommand = { + required("data-sources") + required("download") + getUri + } + +} diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala index 22d3f2be919b60836db6584a47ff536fb23b5072..7464366e69387664569796b1b523191abec7bb80 100644 --- a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala +++ b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala @@ -15,12 +15,15 @@ */ package nl.lumc.sasc.biopet.pipelines.toucan -import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.summary.SummaryQScript -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand, Reference } -import nl.lumc.sasc.biopet.extensions.VariantEffectPredictor -import nl.lumc.sasc.biopet.extensions.tools.{ VcfWithVcf, VepNormalizer } -import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView +import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsIntersect, BedtoolsMerge } +import nl.lumc.sasc.biopet.extensions.manwe.{ ManweAnnotateVcf, ManweSamplesImport } +import nl.lumc.sasc.biopet.extensions.tools.{ GvcfToBed, VcfWithVcf, VepNormalizer } +import nl.lumc.sasc.biopet.extensions.{ Bgzip, Ln, VariantEffectPredictor } +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript /** @@ -34,19 +37,34 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum @Input(doc = "Input VCF file", shortName = "Input", required = true) var inputVCF: File = _ + @Input(doc = "Input GVCF file", shortName = "gvcf", required = false) + var inputGvcf: Option[File] = None + + var sampleIds: List[String] = Nil def init(): Unit = { inputFiles :+= new InputFile(inputVCF) + sampleIds = root match { + case m: MultiSampleQScript => m.samples.keys.toList + case null => VcfUtils.getSampleIds(inputVCF) + case s: SampleLibraryTag => s.sampleId.toList + case _ => throw new IllegalArgumentException("You don't have any samples") + } } override def defaults = Map( "varianteffectpredictor" -> Map("everything" -> true, "failed" -> 1, "allow_non_variant" -> true) ) - //defaults ++= Map("varianteffectpredictor" -> Map("everything" -> true)) - def biopetScript(): Unit = { + val doVarda: Boolean = config("use_varda", default = false) + val useVcf: File = if (doVarda) { + inputGvcf match { + case Some(s) => varda(inputVCF, s) + case _ => throw new IllegalArgumentException("You have not specified a GVCF file") + } + } else inputVCF val vep = new VariantEffectPredictor(this) - vep.input = inputVCF + vep.input = useVcf vep.output = new File(outputDir, inputVCF.getName.stripSuffix(".gz").stripSuffix(".vcf") + ".vep.vcf") vep.isIntermediate = true add(vep) @@ -89,6 +107,112 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum addSummaryJobs() } + /** + * Performs the varda import and activate for one sample + * @param sampleID the sampleID to be used + * @param inputVcf the input VCF + * @param gVCF the gVCF for coverage + * @param annotation: ManweDownloadAnnotateVcf object of annotated vcf + * @return + */ + def importAndActivateSample(sampleID: String, inputVcf: File, + gVCF: File, annotation: ManweAnnotateVcf): ManweActivateAfterAnnotImport = { + + val minGQ: Int = config("minimum_genome_quality", default = 20, submodule = "manwe") + val isPublic: Boolean = config("varda_is_public", default = true, submodule = "manwe") + + val bedTrack = new GvcfToBed(this) + bedTrack.inputVcf = gVCF + bedTrack.outputBed = swapExt(outputDir, gVCF, ".vcf.gz", s""".$sampleID.bed""") + bedTrack.minQuality = minGQ + bedTrack.isIntermediate = true + bedTrack.sample = Some(sampleID) + add(bedTrack) + + val mergedBed = new BedtoolsMerge(this) + mergedBed.input = bedTrack.outputBed + mergedBed.dist = 5 + mergedBed.output = swapExt(outputDir, bedTrack.outputBed, ".bed", ".merged.bed") + add(mergedBed) + + val bgzippedBed = new Bgzip(this) + bgzippedBed.input = List(mergedBed.output) + bgzippedBed.output = swapExt(outputDir, mergedBed.output, ".bed", ".bed.gz") + add(bgzippedBed) + + val singleVcf = new BcftoolsView(this) + singleVcf.input = inputVCF + singleVcf.output = swapExt(outputDir, inputVCF, ".vcf.gz", s""".$sampleID.vcf.gz""") + singleVcf.samples = List(sampleID) + singleVcf.minAC = Some(1) + singleVcf.isIntermediate = true + add(singleVcf) + + val intersected = new BedtoolsIntersect(this) + intersected.input = singleVcf.output + intersected.intersectFile = bgzippedBed.output + intersected.output = swapExt(outputDir, singleVcf.output, ".vcf.gz", ".intersected.vcf") + add(intersected) + + val bgzippedIntersect = new Bgzip(this) + bgzippedIntersect.input = List(intersected.output) + bgzippedIntersect.output = swapExt(outputDir, intersected.output, ".vcf", ".vcf.gz") + add(bgzippedIntersect) + + val imported = new ManweSamplesImport(this) + imported.vcfs = List(bgzippedIntersect.output) + imported.beds = List(bgzippedBed.output) + imported.name = Some(sampleID) + imported.public = isPublic + imported.waitToComplete = false + imported.isIntermediate = true + imported.output = swapExt(outputDir, intersected.output, ".vcf.gz", ".manwe.import") + add(imported) + + val active = new ManweActivateAfterAnnotImport(this, annotation, imported) + active.output = swapExt(outputDir, imported.output, ".import", ".activated") + add(active) + active + + } + + /** + * Perform varda analysis + * @param vcf input vcf + * @param gVcf The gVCF to be used for coverage calculations + * @return return vcf + */ + def varda(vcf: File, gVcf: File): File = { + + val annotationQueries: List[String] = config("annotation_queries", default = List("GLOBAL *"), submodule = "manwe") + //TODO: add groups!!! Need sample-specific group tags for this + + val annotate = new ManweAnnotateVcf(this) + annotate.vcf = vcf + if (annotationQueries.nonEmpty) { + annotate.queries = annotationQueries + } + annotate.waitToComplete = true + annotate.output = swapExt(outputDir, vcf, ".vcf.gz", ".manwe.annot") + annotate.isIntermediate = true + add(annotate) + + val annotatedVcf = new ManweDownloadAfterAnnotate(this, annotate) + annotatedVcf.output = swapExt(outputDir, annotate.output, ".manwe.annot", "manwe.annot.vcf.gz") + add(annotatedVcf) + + val activates = sampleIds map { x => importAndActivateSample(x, vcf, gVcf, annotate) } + + val finalLn = new Ln(this) + activates.foreach(x => finalLn.deps :+= x.output) + finalLn.input = annotatedVcf.output + finalLn.output = swapExt(outputDir, annotatedVcf.output, "manwe.annot.vcf.gz", ".varda_annotated.vcf.gz") + finalLn.relative = true + add(finalLn) + + finalLn.output + } + def summaryFile = new File(outputDir, "Toucan.summary.json") def summaryFiles = Map()