diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index 1e00d04cc24b86fa1f1df1cecbfe7bfc12c482aa..01fd32ab2aef9de5a55d8e726fd4334980888b32 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -39,7 +39,6 @@ class BamMetrics(val root: Configurable) extends QScript var inputBam: File = _ /** Settings for CollectRnaSeqMetrics */ - var rnaMetricsSettings: Map[String, String] = Map() var transcriptRefFlatFile: Option[File] = config("transcript_refflat") /** return location of summary file */ @@ -107,8 +106,6 @@ class BamMetrics(val root: Configurable) extends QScript rnaMetrics.output = swapExt(outputDir, inputBam, ".bam", ".rna.metrics") rnaMetrics.chartOutput = Some(swapExt(outputDir, inputBam, ".bam", ".rna.metrics.pdf")) rnaMetrics.refFlat = transcriptRefFlatFile.get - rnaMetrics.ribosomalIntervals = rnaMetricsSettings.get("ribosomal_intervals").collect { case n => new File(n) } - rnaMetrics.strandSpecificity = rnaMetricsSettings.get("strand_specificity") add(rnaMetrics) addSummarizable(rnaMetrics, "rna") } diff --git a/public/biopet-extensions/pom.xml b/public/biopet-extensions/pom.xml index d4483082548a15b58eec526eff896f18d9a558db..26cc102b5df164ac6fc4df51fe89eb33b87ffe27 100644 --- a/public/biopet-extensions/pom.xml +++ b/public/biopet-extensions/pom.xml @@ -46,6 +46,13 @@ <version>2.2.1</version> <scope>test</scope> </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-all</artifactId> + <version>1.9.5</version> + <scope>test</scope> + </dependency> + </dependencies> </project> \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsView.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsView.scala new file mode 100644 index 0000000000000000000000000000000000000000..4bb87d332e15abc0e93f71f43e2329eb574e95ca --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/BcftoolsView.scala @@ -0,0 +1,184 @@ +package nl.lumc.sasc.biopet.extensions.bcftools + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } + +/** + * Created by ahbbollen on 12-10-15. + */ +class BcftoolsView(val root: Configurable) extends Bcftools { + + @Input(doc = "Input VCF file") + var input: File = _ + + @Output(doc = "Output file") + var output: File = _ + + @Argument(doc = "drop individual genotypes", required = false) + var dropGenotype: Boolean = config("drop_genotype", default = false) + + @Argument(doc = "header only", required = false) + var headerOnly: Boolean = config("header_only", false) + + @Argument(doc = "Compression level", required = false) + var compressionLevel: Int = config("compression_level", default = 9) + + @Argument(doc = "output type", required = false) + var outputType: String = "z" + + @Argument(doc = "regions", required = false) + var regions: Option[String] = config("r") + + @Argument(doc = "region file", required = false) + var regionFile: Option[File] = config("R") + + @Argument(doc = "targets", required = false) + var targets: Option[String] = config("t") + + @Argument(doc = "targets file", required = false) + var targetFile: Option[File] = config("T") + + @Argument(doc = "trim alt alleles", required = false) + var trimAltAlleles: Boolean = config("trim_alt_allele", default = false) + + @Argument(doc = "no update", required = false) + var noUpdate: Boolean = config("no_update", default = false) + + @Argument(doc = "samples", required = false) + var samples: List[String] = config("s", default = Nil) + + @Argument(doc = "samples file", required = false) + var sampleFile: Option[File] = config("S") + + @Argument(doc = "minimum allele count", required = false) + var minAC: Option[Int] = config("c") + + @Argument(doc = "max allele count", required = false) + var maxAC: Option[Int] = config("C") + + @Argument(doc = "exclude (expression)", required = false) + var exclude: Option[String] = config("e") + + @Argument(doc = "apply filters", required = false) + var applyFilters: List[String] = config("F", default = Nil) + + @Argument(doc = "genotype", required = false) + var genotype: Option[String] = config("g") + + @Argument(doc = "include (expression)", required = false) + var include: Option[String] = config("i") + + @Argument(doc = "Known (ID field is not .) only", required = false) + var known: Boolean = config("k", default = false) + + @Argument(doc = "min alleles", required = false) + var minAlleles: Option[Int] = config("m") + + @Argument(doc = "max alleles", required = false) + var maxAlleles: Option[Int] = config("M") + + @Argument(doc = "novel (ID field is .) only", required = false) + var novel: Boolean = config("n", false) + + @Argument(doc = "phased only", required = false) + var phased: Boolean = config("p", false) + + @Argument(doc = "exclude phased (only)", required = false) + var excludePhased: Boolean = config("P", false) + + @Argument(doc = "min allele frequency", required = false) + var minAF: Option[Int] = config("q") + + @Argument(doc = "max allele frequency", required = false) + var maxAF: Option[Int] = config("Q") + + @Argument(doc = "uncalled only", required = false) + var uncalled: Boolean = config("u", default = false) + + @Argument(doc = "exclude uncalled (only)", required = false) + var excludeUncalled: Boolean = config("U", default = false) + + @Argument(doc = "types", required = false) + var types: Option[String] = config("v") + + @Argument(doc = "exclude types", required = false) + var excludeTypes: Option[String] = config("V") + + @Argument(doc = "private (requires samples)", required = false) + var onlyPrivate: Boolean = config("x", default = false) + + @Argument(doc = "Exclude privates", required = false) + var excludePrivate: Boolean = config("X", default = false) + + override def beforeGraph() = { + super.beforeGraph() + + require((compressionLevel <= 9) && (compressionLevel >= 0)) + require( + (outputType.length == 1) && + (outputType == "z" || outputType == "b" || outputType == "u" || outputType == "v") + ) + } + + def baseCmd = { + executable + + required("view") + + conditional(dropGenotype, "-G") + + conditional(headerOnly, "-h") + + required("-l", compressionLevel) + + required("-O", outputType) + + optional("-r", regions) + + optional("-R", regionFile) + + optional("-t", targets) + + optional("-T", targetFile) + + conditional(trimAltAlleles, "-a") + + conditional(noUpdate, "-I") + + repeat("-s", samples) + + optional("-S", sampleFile) + + optional("-c", minAC) + + optional("-C", maxAC) + + optional("-e", exclude) + + optional("-f", applyFilters) + + optional("-g", genotype) + + optional("-i", include) + + conditional(known, "-k") + + optional("-m", minAlleles) + + optional("-M", maxAlleles) + + conditional(novel, "-n") + + conditional(phased, "-p") + + conditional(excludePhased, "-P") + + optional("-q", minAF) + + optional("-Q", maxAF) + + conditional(uncalled, "-u") + + conditional(excludeUncalled, "-U") + + optional("-v", types) + + optional("-V", excludeTypes) + + conditional(onlyPrivate, "-x") + + conditional(excludePrivate, "-X") + } + + def cmdPipeInput = { + baseCmd + "-" + } + + def cmdPipe = { + baseCmd + required(input) + } + + def cmdLine = { + baseCmd + required("-o", output) + required(input) + } + + /** + * Convert cmdLine into line without quotes and double spaces + * primarily for testing + * @return + */ + final def cmd = { + val a = cmdLine + a.replace("'", "").replace(" ", " ").trim + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala new file mode 100644 index 0000000000000000000000000000000000000000..f72411a6ab7b83c7661d4f36ec38d12bca3136c1 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala @@ -0,0 +1,28 @@ +package nl.lumc.sasc.biopet.extensions.bedtools + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output, Input } + +/** + * Created by ahbbollen on 5-1-16. + */ +class BedtoolsMerge(val root: Configurable) extends Bedtools { + + @Input(doc = "Input bed file") + var input: File = _ + + @Argument(doc = "Distance") + var dist: Option[Int] = config("dist") //default of tool is 1 + + @Output(doc = "Output bed file") + var output: File = _ + + def cmdLine = { + required(executable) + required("merge") + + required("-i", input) + optional("-d", dist) + + " > " + required(output) + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2.scala index 26a3175612724b3594cf865a31bcdbe201a6e985..793fd7d159a64c5f432afd575d5a04b7d4fa7c09 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bowtie/Bowtie2.scala @@ -139,6 +139,7 @@ class Bowtie2(val root: Configurable) extends BiopetCommandLineFunction with Ref Logging.addError(s"No index files found for bowtie2 in: $indexDir with basename: $basename") } } + /** return commandline to execute */ def cmdLine = required(executable) + conditional(q, "-q") + diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/Manwe.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/Manwe.scala new file mode 100644 index 0000000000000000000000000000000000000000..e2566088d254a9e467d1a00a90e477e431c6d534 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/Manwe.scala @@ -0,0 +1,83 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.{ PrintWriter, File } + +import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction +import org.broadinstitute.gatk.utils.commandline.{ Output, Argument } + +/** + * Created by ahbbollen on 23-9-15. + * This is python, but not accessed like a script; i.e. called by simply + * manwe [subcommand] + */ +abstract class Manwe extends BiopetCommandLineFunction { + executable = config("exe", default = "manwe", submodule = "manwe") + + var manweConfig: File = createManweConfig(None) + + @Output(doc = "the output file") + var output: File = _ + + var manweHelp: Boolean = false + + def subCommand: String + + final def cmdLine = { + manweConfig = createManweConfig(Option(output).map(_.getParentFile)) + required(executable) + + subCommand + + required("-c", manweConfig) + + conditional(manweHelp, "-h") + + " > " + + required(output) + + } + + /** + * Convert cmdLine into line without quotes and double spaces + * primarily for testing + * @return + */ + final def cmd = { + val a = cmdLine + a.replace("'", "").replace(" ", " ").trim + } + + /** + * Create Manwe config from biopet config + * @return Manwe config file + */ + def createManweConfig(directory: Option[File]): File = { + val url: String = config("varda_root") + val token: String = config("varda_token") + val sslSettings: Option[String] = config("varda_verify_certificate") + val collectionCacheSize: Option[Int] = config("varda_cache_size", default = 20) + val dataBufferSize: Option[Int] = config("varda_buffer_size", default = 1024 * 1024) + val taskPollWait: Option[Int] = config("varda_task_poll_wait", default = 2) + + val settingsMap: Map[String, Any] = Map( + "API_ROOT" -> s"'$url'", + "TOKEN" -> s"'$token'", + "VERIFY_CERTIFICATE" -> (sslSettings match { + case Some("true") => "True" + case Some("false") => "False" + case Some(x) => s"'$x'" + case _ => "True" + }), + "COLLECTION_CACHE_SIZE" -> collectionCacheSize.getOrElse(20), + "DATA_BUFFER_SIZE" -> dataBufferSize.getOrElse(1048576), + "TASK_POLL_WAIT" -> taskPollWait.getOrElse(2) + ) + + val file = directory match { + case Some(dir) => File.createTempFile("manwe_config", ".py", dir) + case None => File.createTempFile("manwe_config", ".py") + } + + file.deleteOnExit() + val writer = new PrintWriter(file) + settingsMap.foreach { case (key, value) => writer.println(s"$key = $value") } + writer.close() + file + } +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateBed.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateBed.scala new file mode 100644 index 0000000000000000000000000000000000000000..f3c3fbb1e22a019a18b7eb8caa6d262e2f3347f8 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateBed.scala @@ -0,0 +1,32 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweAnnotateBed(val root: Configurable) extends Manwe { + + @Input(doc = "the bed to annotate") + var bed: File = _ + + @Argument(doc = "flag if data has already been uploaded") + var alreadyUploaded: Boolean = false + + @Argument(doc = "Flag whether to wait for annotation to complete on the server") + var waitToComplete: Boolean = false + + @Argument(doc = "annotation queries", required = false) + var queries: List[String] = Nil + + def subCommand = { + required("annotate-bed") + required(bed) + + conditional(alreadyUploaded, "-u") + + repeat("-q", queries) + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateVcf.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateVcf.scala new file mode 100644 index 0000000000000000000000000000000000000000..64a849a536d4695facbfd290379426fd3d646287 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweAnnotateVcf.scala @@ -0,0 +1,32 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Input } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweAnnotateVcf(val root: Configurable) extends Manwe { + + @Input(doc = "the vcf to annotate") + var vcf: File = _ + + @Argument(doc = "flag if data has already been uploaded") + var alreadyUploaded: Boolean = false + + @Argument(doc = "flag whether to wait for annotation to complete") + var waitToComplete: Boolean = false + + @Argument(doc = "annotation queries", required = false) + var queries: List[String] = Nil + + def subCommand = { + required("annotate-vcf") + required(vcf) + + conditional(alreadyUploaded, "-u") + + repeat("-q", queries) + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesAnnotate.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesAnnotate.scala new file mode 100644 index 0000000000000000000000000000000000000000..0980b4c23d4450c95b17baa8c1d2f52bfb6a63d5 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesAnnotate.scala @@ -0,0 +1,27 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.Argument + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweDataSourcesAnnotate(val root: Configurable) extends Manwe { + + @Argument(doc = "uri to data source to annotate") + var uri: Option[String] = _ + + @Argument(doc = "list of queries", required = false) + var queries: List[String] = Nil + + @Argument(doc = "Flag whether to wait for annotation to complete on server") + var waitToComplete: Boolean = false + + def subCommand = { + required("data-sources") + required("annotate") + + required(uri) + + repeat("-q", queries) + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesDownload.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesDownload.scala new file mode 100644 index 0000000000000000000000000000000000000000..d55dd6a4f8c8c181bd565c60d085b938a08fe4a0 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesDownload.scala @@ -0,0 +1,19 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.Argument + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweDataSourcesDownload(val root: Configurable) extends Manwe { + + @Argument(doc = "uri to data source to download") + var uri: String = _ + + def subCommand = { + required("data-sources") + + required("download") + + required(uri) + } +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesList.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesList.scala new file mode 100644 index 0000000000000000000000000000000000000000..dbcb7b46913e7bb66ba41d74bc141e3ed861a0b0 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesList.scala @@ -0,0 +1,23 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweDataSourcesList(val root: Configurable) extends Manwe { + + @Argument(doc = "User uri to filter by") + var user: Option[String] = _ + + def subCommand = { + required("data-sources") + + required("list") + + optional("-u", user) + } + +} + diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesShow.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesShow.scala new file mode 100644 index 0000000000000000000000000000000000000000..662bb51b79e292a352d524ca2437dd953737d894 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweDataSourcesShow.scala @@ -0,0 +1,22 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweDataSourcesShow(val root: Configurable) extends Manwe { + + @Argument(doc = "uri of data source") + var uri: Option[String] = _ + + def subCommand = { + required("data-sources") + + required("show") + + required(uri) + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesActivate.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesActivate.scala new file mode 100644 index 0000000000000000000000000000000000000000..3815d64a258dd52b354bb2be2be1610d6eb2d4cb --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesActivate.scala @@ -0,0 +1,22 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesActivate(val root: Configurable) extends Manwe { + + @Argument(doc = "uri to sample to activate") + var uri: String = _ + + def subCommand = { + required("samples") + + required("activate") + + required(uri) + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAdd.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAdd.scala new file mode 100644 index 0000000000000000000000000000000000000000..fc5cababd9c141208b2a8ec945895babaac0dc4a --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAdd.scala @@ -0,0 +1,30 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesAdd(val root: Configurable) extends Manwe { + + @Argument(doc = "the sample name") + var name: Option[String] = _ + + @Argument(doc = "the sample groups [uris]", required = false) + var group: List[String] = Nil + + @Argument(doc = "pool size") + var poolSize: Option[Int] = _ + + def subCommand = { + required("samples") + + required("add") + + required(name) + + optional("-s", poolSize) + + repeat("-g", group) + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAnnotateVariations.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAnnotateVariations.scala new file mode 100644 index 0000000000000000000000000000000000000000..04092cc14b1cd39fd8d8a006c77dfadfa9163627 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesAnnotateVariations.scala @@ -0,0 +1,26 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesAnnotateVariations(val root: Configurable) extends Manwe { + + @Argument(doc = "uri to sample to annotate") + var uri: Option[String] = _ + + @Argument(doc = "Annotation queries", required = false) + var queries: List[String] = Nil + + def subCommand = { + required("samples") + + required("annotate-variations") + + required(uri) + + repeat("-q", queries) + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImport.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImport.scala new file mode 100644 index 0000000000000000000000000000000000000000..e683f2c2a7d8e31481becf2d9ed4b6567efc744f --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImport.scala @@ -0,0 +1,62 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesImport(val root: Configurable) extends Manwe { + + /** + * Creates sample and imports vcf and bed files immediately + */ + + @Argument(doc = "name of sample", required = true) + var name: Option[String] = _ + + @Argument(doc = "Group uris", required = false) + var group: List[String] = Nil + + @Input(doc = "Vcf files to upload", required = false) + var vcfs: List[File] = Nil + + @Input(doc = "BED files to upload", required = false) + var beds: List[File] = Nil + + @Argument(doc = "flag for data already uploaded", required = false) + var alreadyUploaded: Boolean = false + + @Argument(doc = "flag to mark sample as public", required = false) + var public: Boolean = false + + @Argument(doc = "flag if sample has no coverage profile", required = false) + var noCoverage: Boolean = false + + @Argument(doc = "Prefer genotypes derived from likelihood (PL) fields in stead of GT field", required = false) + var preferLikelihood: Boolean = false + + @Argument(doc = "Pool size", required = false) + var poolSize: Option[Int] = _ + + @Argument(doc = " Flag whether to wait for import to complete on server", required = false) + var waitToComplete: Boolean = false + + def subCommand = { + required("samples") + + required("import") + + required(name) + + repeat("-g", group) + + repeat("--vcf", vcfs) + + repeat("--bed", beds) + + optional("-s", poolSize) + + conditional(alreadyUploaded, "-u") + + conditional(public, "-p") + + conditional(preferLikelihood, "-l") + + conditional(noCoverage, "--no-coverage-profile") + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportBed.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportBed.scala new file mode 100644 index 0000000000000000000000000000000000000000..9eb3f12d0974d95af940b0e34f3b930fd06ba0a1 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportBed.scala @@ -0,0 +1,38 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesImportBed(val root: Configurable) extends Manwe { + + /** + * Import bed for existing sample + */ + + @Argument(doc = "uri of sample to upload to") + var uri: Option[String] = _ + + @Argument(doc = "path to VCF file to upload") + var bed: File = _ + + @Argument(doc = "flag if data is already uploaded?") // TODO: What is the use of this flag even? We're specifically uploading with this command + var alreadyUploaded: Boolean = false + + @Argument(doc = " Flag whether to wait for import to complete on server") + var waitToComplete: Boolean = false + + def subCommand = { + required("samples") + + required("import-bed") + + required(uri) + + required(bed) + + conditional(alreadyUploaded, "-u") + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportVcf.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportVcf.scala new file mode 100644 index 0000000000000000000000000000000000000000..61f6a5f223b4af57d6066ae8f80cb0164cb8cd53 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesImportVcf.scala @@ -0,0 +1,41 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesImportVcf(val root: Configurable) extends Manwe { + + /** + * Import vcf for existing sample + */ + + @Argument(doc = "uri of sample to upload to") + var uri: Option[String] = _ + + @Argument(doc = "path to VCF file to upload") + var vcf: File = _ + + @Argument(doc = "flag if data is already uploaded?") // TODO: What is the use of this flag even? We're specifically uploading with this command + var alreadyUploaded: Boolean = false + + @Argument(doc = "Flag when to prefer genotype likelihoods") + var preferLikelihoods: Boolean = false + + @Argument(doc = " Flag whether to wait for import to complete on server") + var waitToComplete: Boolean = false + + def subCommand = { + required("samples") + + required("import-vcf") + + required(uri) + required(vcf) + + conditional(alreadyUploaded, "-u") + + conditional(preferLikelihoods, "-l") + + conditional(waitToComplete, "--wait") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesList.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesList.scala new file mode 100644 index 0000000000000000000000000000000000000000..d79b85fe1048210830d532091e4d588dac9c96d4 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesList.scala @@ -0,0 +1,29 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 23-9-15. + */ +class ManweSamplesList(val root: Configurable) extends Manwe { + + @Argument(doc = "filter by user URI", required = false) + var user: Option[String] = None + + @Argument(doc = "filter by group URI", required = false) + var group: List[String] = Nil + + var onlyPublic: Boolean = false + + def subCommand = { + required("samples") + + required("list") + + optional("-u", user) + + repeat("-g", group) + + conditional(onlyPublic, "-p") + } + +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesShow.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesShow.scala new file mode 100644 index 0000000000000000000000000000000000000000..6a73f84a44c44505509fe993a6fbca92da393b66 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/manwe/ManweSamplesShow.scala @@ -0,0 +1,22 @@ +package nl.lumc.sasc.biopet.extensions.manwe + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output } + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweSamplesShow(val root: Configurable) extends Manwe { + + @Argument(doc = "The sample to show") + var uri: Option[String] = _ + + def subCommand = { + required("samples") + + required("show") + + required(uri) + } + +} diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/BcfToolsTest.scala b/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/BcfToolsTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..ff7ce95d59adcce87590605b5c5f8239f83af50c --- /dev/null +++ b/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/BcfToolsTest.scala @@ -0,0 +1,35 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by ahbbollen on 12-10-15. + */ +class BcfToolsTest extends TestNGSuite with Matchers with MockitoSugar { + + @Test + def BcfToolsViewTest = { + val view = new BcftoolsView(null) + + view.executable = "bcftools" + + val tmpInput = File.createTempFile("bcftoolstest", ".vcf") + tmpInput.deleteOnExit() + val tmpOutput = File.createTempFile("bcftoolstest", ".vcf.gz") + tmpOutput.deleteOnExit() + val inputPath = tmpInput.getAbsolutePath + val outputPath = tmpOutput.getAbsolutePath + + view.input = tmpInput + view.output = tmpOutput + + view.cmd should equal(s"bcftools view -l 9 -O z -o $outputPath $inputPath") + } + +} diff --git a/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/ManweTest.scala b/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/ManweTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..6d1cf3d60c7f2b0d995b9b82b59a1391d37162db --- /dev/null +++ b/public/biopet-extensions/src/test/scala/nl/lumc/sasc/biopet/extensions/ManweTest.scala @@ -0,0 +1,372 @@ +package nl.lumc.sasc.biopet.extensions + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.manwe._ +import nl.lumc.sasc.biopet.utils.config.Config +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +import scala.io.Source + +/** + * Created by ahbbollen on 24-9-15. + */ +class ManweTest extends TestNGSuite with Matchers { + + @Test + def testManweAnnotatedBed = { + val manwe = new ManweAnnotateBed(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + val bed = File.createTempFile("manwe", "bed") + out.deleteOnExit() + bed.deleteOnExit() + + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.bed = bed + manwe.alreadyUploaded = false + manwe.cmd should equal(s"manwe annotate-bed ${bed.getAbsolutePath} -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.queries = List("/uri/1&&/uri/2") + manwe.cmd should equal(s"manwe annotate-bed ${bed.getAbsolutePath} -q /uri/1&&/uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.alreadyUploaded = true + manwe.cmd should equal(s"manwe annotate-bed ${bed.getAbsolutePath} -u -q /uri/1&&/uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe annotate-bed ${bed.getAbsolutePath} -u -q /uri/1&&/uri/2 --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweAnnotateVcf = { + val manwe = new ManweAnnotateVcf(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + val vcf = File.createTempFile("manwe", "vcf") + out.deleteOnExit() + vcf.deleteOnExit() + + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.vcf = vcf + manwe.alreadyUploaded = false + manwe.cmd should equal(s"manwe annotate-vcf ${vcf.getAbsolutePath} -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.queries = List("/uri/1&&/uri/2") + manwe.cmd should equal(s"manwe annotate-vcf ${vcf.getAbsolutePath} -q /uri/1&&/uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.alreadyUploaded = true + manwe.cmd should equal(s"manwe annotate-vcf ${vcf.getAbsolutePath} -u -q /uri/1&&/uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe annotate-vcf ${vcf.getAbsolutePath} -u -q /uri/1&&/uri/2 --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweDataSourcesAnnotate = { + val manwe = new ManweDataSourcesAnnotate(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.uri = Some("/uri/1") + manwe.cmd should equal(s"manwe data-sources annotate /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.queries = List("/uri/1&&/uri/2") + manwe.cmd should equal(s"manwe data-sources annotate /uri/1 -q /uri/1&&/uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe data-sources annotate /uri/1 -q /uri/1&&/uri/2 --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweDataSourcesDownload = { + val manwe = new ManweDataSourcesDownload(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + + manwe.output = out + manwe.uri = "/uri/1" + manwe.cmd should equal(s"manwe data-sources download /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweDataSourcesList = { + val manwe = new ManweDataSourcesList(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.cmd should equal(s"manwe data-sources list -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweDataSourcesShow = { + val manwe = new ManweDataSourcesShow(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.uri = Some("/uri/1") + manwe.cmd should equal(s"manwe data-sources show /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesActivate = { + val manwe = new ManweSamplesActivate(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.uri = "/uri/1" + manwe.cmd should equal(s"manwe samples activate /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesAdd = { + val manwe = new ManweSamplesAdd(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.name = Some("pietje") + manwe.cmd should equal(s"manwe samples add pietje -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.group = List("/uri/1", "/uri/2") + manwe.cmd should equal(s"manwe samples add pietje -g /uri/1 -g /uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.poolSize = Some(3) + manwe.cmd should equal(s"manwe samples add pietje -s 3 -g /uri/1 -g /uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesAnnotateVariations = { + val manwe = new ManweSamplesAnnotateVariations(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.uri = Some("/uri/1") + manwe.cmd should equal(s"manwe samples annotate-variations /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.queries = List("/uri/1&&/uri/2", "/uri/3") + manwe.cmd should equal(s"manwe samples annotate-variations /uri/1 -q /uri/1&&/uri/2 -q /uri/3 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesImport = { + val manwe = new ManweSamplesImport(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.name = Some("pietje") + manwe.cmd should equal(s"manwe samples import pietje -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.group = List("/uri/1&&/uri/2", "/uri/3") + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + val vcfs: List[File] = (0 until 4).map(_ => File.createTempFile("manwe", "test")).toList + val beds: List[File] = (0 until 4).map(_ => File.createTempFile("manwe", "test")).toList + vcfs.foreach(x => x.deleteOnExit()) + beds.foreach(x => x.deleteOnExit()) + manwe.vcfs = vcfs + manwe.beds = beds + + val vcfLine = vcfs.foldLeft("")((r, f) => r + s"--vcf ${f.getAbsolutePath} ").trim + val bedLine = beds.foldLeft("")((r, f) => r + s"--bed ${f.getAbsolutePath} ").trim + + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.poolSize = Some(4) + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.alreadyUploaded = true + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -u -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.public = true + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -u -p -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.preferLikelihood = true + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -u -p -l -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.noCoverage = true + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -u -p -l --no-coverage-profile -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe samples import pietje -g /uri/1&&/uri/2 -g /uri/3 $vcfLine $bedLine -s 4 -u -p -l --no-coverage-profile --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesImportBed = { + + val manwe = new ManweSamplesImportBed(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + + val bed = File.createTempFile("manwe", "test") + bed.deleteOnExit() + manwe.bed = bed + + manwe.uri = Some("/uri/1") + manwe.cmd should equal(s"manwe samples import-bed /uri/1 ${bed.getAbsolutePath} -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.alreadyUploaded = true + manwe.cmd should equal(s"manwe samples import-bed /uri/1 ${bed.getAbsolutePath} -u -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe samples import-bed /uri/1 ${bed.getAbsolutePath} -u --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesImportVcf = { + val manwe = new ManweSamplesImportVcf(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + + val vcf = File.createTempFile("manwe", "test") + vcf.deleteOnExit() + manwe.vcf = vcf + + manwe.uri = Some("/uri/1") + manwe.cmd should equal(s"manwe samples import-vcf /uri/1 ${vcf.getAbsolutePath} -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.alreadyUploaded = true + manwe.cmd should equal(s"manwe samples import-vcf /uri/1 ${vcf.getAbsolutePath} -u -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.preferLikelihoods = true + manwe.cmd should equal(s"manwe samples import-vcf /uri/1 ${vcf.getAbsolutePath} -u -l -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.waitToComplete = true + manwe.cmd should equal(s"manwe samples import-vcf /uri/1 ${vcf.getAbsolutePath} -u -l --wait -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesList = { + val manwe = new ManweSamplesList(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + + manwe.cmd should equal(s"manwe samples list -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.group = List("/uri/1", "/uri/2") + manwe.cmd should equal(s"manwe samples list -g /uri/1 -g /uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.user = Some("/uri/3") + manwe.cmd should equal(s"manwe samples list -u /uri/3 -g /uri/1 -g /uri/2 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + + manwe.onlyPublic = true + manwe.cmd should equal(s"manwe samples list -u /uri/3 -g /uri/1 -g /uri/2 -p -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test + def testManweSamplesShow = { + val manwe = new ManweSamplesShow(null) { + override def globalConfig = new Config(Map("manwe_config" -> "${manwe.manweConfig.getAbsolutePath}")) + } + + val out = File.createTempFile("manwe", "test") + out.deleteOnExit() + manwe.manweConfig = out.getParentFile + manwe.output = out + manwe.uri = Some("/uri/1") + + manwe.cmd should equal(s"manwe samples show /uri/1 -c ${manwe.manweConfig.getAbsolutePath} > ${out.getAbsolutePath}") + } + + @Test def testConfigCreation = { + val manwe = new ManweAnnotateBed(null) { + override def globalConfig = new Config(Map( + "varda_root" -> "http://127.0.0.1:5000", + "varda_token" -> "QWERTYUIOPASDFGHJKLZXCVBNM", + "varda_cache_size" -> 25, + "varda_buffer_size" -> 200, + "varda_task_poll_wait" -> 5, + "varda_verify_certificate" -> true)) + } + + val file: File = manwe.createManweConfig(None) + val contents = Source.fromFile(file).getLines().toList + + val supposedContent = List("API_ROOT = 'http://127.0.0.1:5000'", + "TOKEN = 'QWERTYUIOPASDFGHJKLZXCVBNM'", + "VERIFY_CERTIFICATE = True", + "COLLECTION_CACHE_SIZE = 25", + "DATA_BUFFER_SIZE = 200", + "TASK_POLL_WAIT = 5" + ) + + supposedContent.sorted should equal(contents.sorted) + + val manwe2 = new ManweAnnotateBed(null) { + override def globalConfig = new Config(Map( + "varda_root" -> "http://127.0.0.1:5000", + "varda_token" -> "QWERTYUIOPASDFGHJKLZXCVBNM", + "varda_cache_size" -> 25, + "varda_buffer_size" -> 200, + "varda_task_poll_wait" -> 5, + "varda_verify_certificate" -> "/a/b/c/d.crt")) + } + + val file2: File = manwe2.createManweConfig(None) + val contents2 = Source.fromFile(file2).getLines().toList + val supposedContent2 = List("API_ROOT = 'http://127.0.0.1:5000'", + "TOKEN = 'QWERTYUIOPASDFGHJKLZXCVBNM'", + "VERIFY_CERTIFICATE = '/a/b/c/d.crt'", + "COLLECTION_CACHE_SIZE = 25", + "DATA_BUFFER_SIZE = 200", + "TASK_POLL_WAIT = 5" + ) + + supposedContent2.sorted should equal(contents2.sorted) + } + +} diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GvcfToBed.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GvcfToBed.scala new file mode 100644 index 0000000000000000000000000000000000000000..3ad955d35f0768dcc1a056c999778cb8c6b2e205 --- /dev/null +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/GvcfToBed.scala @@ -0,0 +1,41 @@ +package nl.lumc.sasc.biopet.extensions.tools + +import java.io.File + +import nl.lumc.sasc.biopet.core.ToolCommandFunction +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output, Input } + +/** + * Created by ahbbollen on 13-10-15. + */ +class GvcfToBed(val root: Configurable) extends ToolCommandFunction { + def toolObject = nl.lumc.sasc.biopet.tools.GvcfToBed + + @Input(doc = "input vcf") + var inputVcf: File = _ + + @Output(doc = "output bed") + var outputBed: File = _ + + @Argument(doc = "sample", required = false) + var sample: Option[String] = None + + @Argument(doc = "minquality", required = false) + var minQuality: Int = 0 + + @Argument(doc = "inverse", required = false) + var inverse: Boolean = false + + override def defaultCoreMemory = 4.0 + + override def cmdLine = { + super.cmdLine + + required("-I", inputVcf) + + required("-O", outputBed) + + optional("-S", sample) + + optional("--minGenomeQuality", minQuality) + + conditional(inverse, "--inverted") + } + +} diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala index df131db588a2709d238213968c18aaa86ffb3962..fd43743c2e249d9f34ab012a1dc941f6736b295d 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala @@ -37,6 +37,7 @@ class VcfFilter(val root: Configurable) extends ToolCommandFunction { var minTotalDepth: Option[Int] = config("min_total_depth") var minAlternateDepth: Option[Int] = config("min_alternate_depth") var minSamplesPass: Option[Int] = config("min_samples_pass") + var minGenomeQuality: Option[Int] = config("min_genome_quality") var filterRefCalls: Boolean = config("filter_ref_calls", default = false) override def defaultCoreMemory = 3.0 @@ -53,5 +54,6 @@ class VcfFilter(val root: Configurable) extends ToolCommandFunction { optional("--minTotalDepth", minTotalDepth) + optional("--minAlternateDepth", minAlternateDepth) + optional("--minSamplesPass", minSamplesPass) + + optional("--minGenomeQuality", minGenomeQuality) + conditional(filterRefCalls, "--filterRefCalls") } diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala new file mode 100644 index 0000000000000000000000000000000000000000..f96835488102e2a7c7a43d153a5986faa5e158f7 --- /dev/null +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/GvcfToBed.scala @@ -0,0 +1,108 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.{ File, PrintWriter } + +import htsjdk.variant.variantcontext.VariantContext +import htsjdk.variant.vcf.VCFFileReader +import nl.lumc.sasc.biopet.utils.{ VcfUtils, ToolCommand } +import nl.lumc.sasc.biopet.utils.intervals.BedRecord + +import scala.collection.JavaConversions._ + +/** + * Created by ahbbollen on 13-10-15. + * Create bed track from genome quality values in (g)VCF + */ +object GvcfToBed extends ToolCommand { + + case class Args(inputVcf: File = null, + outputBed: File = null, + invertedOutputBed: Option[File] = None, + sample: Option[String] = None, + minGenomeQuality: Int = 0, + inverse: Boolean = false) extends AbstractArgs + + class OptParser extends AbstractOptParser { + opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) => + c.copy(inputVcf = x) + } text "Input vcf file" + opt[File]('O', "outputBed") required () maxOccurs 1 valueName "<file>" action { (x, c) => + c.copy(outputBed = x) + } text "Output bed file" + opt[File]("invertedOutputBed") maxOccurs 1 valueName "<file>" action { (x, c) => + c.copy(invertedOutputBed = Some(x)) + } text "Output bed file" + opt[String]('S', "sample") unbounded () maxOccurs 1 valueName "<sample>" action { (x, c) => + c.copy(sample = Some(x)) + } text "Sample to consider. Will take first sample on alphabetical order by default" + opt[Int]("minGenomeQuality") unbounded () maxOccurs 1 valueName "<int>" action { (x, c) => + c.copy(minGenomeQuality = x) + } text "Minimum genome quality to consider" + } + + def main(args: Array[String]): Unit = { + val argsParser = new OptParser + val cmdArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1) + + logger.debug("Opening reader") + val reader = new VCFFileReader(cmdArgs.inputVcf, false) + logger.debug("Opening writer") + val writer = new PrintWriter(cmdArgs.outputBed) + val invertedWriter = cmdArgs.invertedOutputBed.collect { + case file => + logger.debug("Opening inverted writer") + new PrintWriter(file) + } + + val sample = cmdArgs.sample.getOrElse(reader.getFileHeader.getSampleNamesInOrder.head) + + val it = reader.iterator() + val firstRecord = it.next() + var contig = firstRecord.getContig + var start = firstRecord.getStart + var end = firstRecord.getEnd + var pass = VcfUtils.hasMinGenomeQuality(firstRecord, sample, cmdArgs.minGenomeQuality) + + def writeResetCachedRecord(newRecord: VariantContext): Unit = { + writeCachedRecord() + contig = newRecord.getContig + start = newRecord.getStart + end = newRecord.getEnd + pass = VcfUtils.hasMinGenomeQuality(newRecord, sample, cmdArgs.minGenomeQuality) + } + + def writeCachedRecord(): Unit = { + if (pass) writer.println(new BedRecord(contig, start - 1, end)) + else invertedWriter.foreach(_.println(new BedRecord(contig, start - 1, end))) + } + + var counter = 1 + logger.info("Start") + for (r <- it) { + if (contig == r.getContig) { + val p = VcfUtils.hasMinGenomeQuality(r, sample, cmdArgs.minGenomeQuality) + if (p != pass || r.getStart > (end + 1)) writeResetCachedRecord(r) + else end = r.getEnd + } else writeResetCachedRecord(r) + + counter += 1 + if (counter % 100000 == 0) { + logger.info(s"Processed $counter records") + } + } + writeCachedRecord() + + logger.info(s"Processed $counter records") + + logger.debug("Closing writer") + writer.close() + invertedWriter.foreach { w => + logger.debug("Closing inverted writer") + w.close() + } + logger.debug("Closing reader") + reader.close() + + logger.info("Finished!") + } +} diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala index 2799fa38b78b6d2acb123b80d5ae5a3bdde3bf3a..c72222d9fc3278f396c7bdd7b5dff645cf4f2620 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala @@ -54,7 +54,8 @@ object VcfFilter extends ToolCommand { filterHetVarToHomVar: List[(String, String)] = Nil, filterRefCalls: Boolean = false, filterNoCalls: Boolean = false, - iDset: Set[String] = Set()) extends AbstractArgs + iDset: Set[String] = Set(), + minGenomeQuality: Int = 0) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('I', "inputVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) => @@ -128,6 +129,9 @@ object VcfFilter extends ToolCommand { opt[File]("idFile") unbounded () action { (x, c) => c.copy(iDset = c.iDset ++ Source.fromFile(x).getLines()) } text "File that contain list of IDs to get from vcf file" + opt[Int]("minGenomeQuality") unbounded () action { (x, c) => + c.copy(minGenomeQuality = x) + } } /** @param args the command line arguments */ @@ -161,6 +165,7 @@ object VcfFilter extends ToolCommand { hasMinTotalDepth(record, cmdArgs.minTotalDepth) && hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) && minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) && + minGenomeQuality(record, cmdArgs.minGenomeQuality, cmdArgs.minSamplesPass) && (cmdArgs.mustHaveVariant.isEmpty || mustHaveVariant(record, cmdArgs.mustHaveVariant)) && calledIn(record, cmdArgs.calledIn) && hasGenotype(record, cmdArgs.mustHaveGenotype) && @@ -263,6 +268,18 @@ object VcfFilter extends ToolCommand { }) >= minSamplesPass } + /** + * Checks if genome quality field has minimum value + * @param record VCF record + * @param minGQ smallest GQ allowed + * @param minSamplesPass number of samples to consider + * @return + */ + def minGenomeQuality(record: VariantContext, minGQ: Int, minSamplesPass: Int = 1): Boolean = { + record.getGenotypes.count(x => if (!x.hasGQ) false + else if (x.getGQ >= minGQ) true else false) >= minSamplesPass + } + /** * Checks if given samples does have a variant hin this record * @param record VCF record diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GvcfToBedTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GvcfToBedTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..5664c9c6fe306a3d845fd1cbe1bf9ded22f153f3 --- /dev/null +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/GvcfToBedTest.scala @@ -0,0 +1,42 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import htsjdk.variant.vcf.VCFFileReader +import nl.lumc.sasc.biopet.utils.VcfUtils +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite + +import GvcfToBed._ +import org.testng.annotations.Test + +/** + * Created by ahbbollen on 13-10-15. + */ +class GvcfToBedTest extends TestNGSuite with Matchers with MockitoSugar { + + private def resourcePath(p: String): String = { + Paths.get(getClass.getResource(p).toURI).toString + } + + val vcf3 = new File(resourcePath("/VCFv3.vcf")) + val vepped = new File(resourcePath("/VEP_oneline.vcf")) + val unvepped = new File(resourcePath("/unvepped.vcf")) + + val vepped_path = resourcePath("/VEP_oneline.vcf") + + @Test def testMinQuality = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + VcfUtils.hasMinGenomeQuality(record, "Sample_101", 99) shouldBe true + + val reader2 = new VCFFileReader(unvepped, false) + val record2 = reader2.iterator.next() + + VcfUtils.hasMinGenomeQuality(record2, "Sample_102", 3) shouldBe true + VcfUtils.hasMinGenomeQuality(record2, "Sample_102", 99) shouldBe false + } +} diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala index 7149ab194d03e1afd40ea99b39b3b39674b2533d..4b3796ee4a3da1fe7bf9635b6d65914b812d640f 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala @@ -144,6 +144,15 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers { minAlternateDepth(record, 20, 2) shouldBe false } + @Test def testHasMinGQ() = { + val reader = new VCFFileReader(vepped, false) + val record = reader.iterator().next() + + minGenomeQuality(record, 99, 1) shouldBe true + minGenomeQuality(record, 99, 2) shouldBe true + minGenomeQuality(record, 99, 3) shouldBe true + } + @Test def testMustHaveVariant() = { val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala index 8e375f4e7e35cbb49c9cc90c688753b2b6ca42ea..e724575ac08dd288e9ff5ae08237bdb0cb208657 100644 --- a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/VcfUtils.scala @@ -15,9 +15,11 @@ */ package nl.lumc.sasc.biopet.utils +import java.io.File import java.util -import htsjdk.variant.variantcontext.VariantContext +import htsjdk.variant.variantcontext.{ Genotype, VariantContext } +import htsjdk.variant.vcf.{ VCFFileReader, VCFHeader, VCFFilterHeaderLine } import scala.collection.JavaConversions._ @@ -80,4 +82,49 @@ object VcfUtils { var1.getEnd == var2.getEnd && var1.getAttributes == var2.getAttributes } + + /** + * Return true if header is a block-type GVCF file + * @param header header of Vcf file + * @return boolean + */ + def isBlockGVcf(header: VCFHeader): Boolean = { + header.getMetaDataLine("GVCFBlock") != null + } + + /** + * Get sample IDs from vcf File + * @param vcf File object pointing to vcf + * @return list of strings with sample IDs + */ + def getSampleIds(vcf: File): List[String] = { + val reader = new VCFFileReader(vcf, false) + val samples = reader.getFileHeader.getSampleNamesInOrder.toList + reader.close() + samples + } + + /** + * Check whether record has minimum genome Quality + * @param record variant context + * @param sample sample name + * @param minGQ minimum genome quality value + * @return + */ + def hasMinGenomeQuality(record: VariantContext, sample: String, minGQ: Int): Boolean = { + if (!record.getSampleNamesOrderedByName.contains(sample)) + throw new IllegalArgumentException("Sample does not exist") + val gt = record.getGenotype(sample) + hasMinGenomeQuality(gt, minGQ) + } + + /** + * Check whether genotype has minimum genome Quality + * @param gt Genotype + * @param minGQ minimum genome quality value + * @return + */ + def hasMinGenomeQuality(gt: Genotype, minGQ: Int): Boolean = { + gt.hasGQ && gt.getGQ >= minGQ + } } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala index 409e027dc5165e27db853f50c2f9f49b90ee3970..fbb095945778b9dcb727cccf668f50184990e161 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala @@ -19,16 +19,14 @@ import java.io.File import nl.lumc.sasc.biopet.FullVersion import nl.lumc.sasc.biopet.core._ -import nl.lumc.sasc.biopet.core.summary._ import nl.lumc.sasc.biopet.extensions.picard.{ MergeSamFiles, SortSam } import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView import nl.lumc.sasc.biopet.extensions.tools.{ MergeTables, WipeReads } import nl.lumc.sasc.biopet.extensions.{ HtseqCount, Ln } -import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig import nl.lumc.sasc.biopet.pipelines.gentrap.extensions.{ CustomVarScan, Pdflatex, RawBaseCounter } import nl.lumc.sasc.biopet.pipelines.gentrap.scripts.{ AggrBaseCount, PdfReportTemplateWriter, PlotHeatmap } -import nl.lumc.sasc.biopet.pipelines.mapping.Mapping +import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait import nl.lumc.sasc.biopet.utils.Logging import nl.lumc.sasc.biopet.utils.config._ import org.broadinstitute.gatk.queue.QScript @@ -46,9 +44,7 @@ import scalaz.Scalaz._ * @author Wibowo Arindrarto <w.arindrarto@lumc.nl> */ class Gentrap(val root: Configurable) extends QScript - with MultiSampleQScript - with SummaryQScript - with Reference { qscript => + with MultisampleMappingTrait { qscript => import Gentrap.ExpMeasures._ import Gentrap.StrandProtocol._ @@ -102,24 +98,25 @@ class Gentrap(val root: Configurable) extends QScript /** Whether to do simple variant calling on RNA or not */ var callVariants: Boolean = config("call_variants", default = false) - /** Settings for all Picard CollectRnaSeqMetrics runs */ - private def collectRnaSeqMetricsSettings: Map[String, String] = Map( - "strand_specificity" -> (strandProtocol match { - case NonSpecific => StrandSpecificity.NONE.toString - case Dutp => StrandSpecificity.SECOND_READ_TRANSCRIPTION_STRAND.toString - case otherwise => throw new IllegalStateException(otherwise.toString) - })) ++ (ribosomalRefFlat match { - case Some(rbs) => Map("ribosomal_intervals" -> rbs.toString) - case None => Map() - }) - /** Default pipeline config */ override def defaults = Map( + "merge_strategy" -> "preprocessmergesam", "gsnap" -> Map( "novelsplicing" -> 1, "batch" -> 4, "format" -> "sam" ), + "bammetrics" -> Map( + "transcript_refflat" -> annotationRefFlat, + "collectrnaseqmetrics" -> ((if (strandProtocol != null) Map( + "strand_specificity" -> (strandProtocol match { + case NonSpecific => StrandSpecificity.NONE.toString + case Dutp => StrandSpecificity.SECOND_READ_TRANSCRIPTION_STRAND.toString + case otherwise => throw new IllegalStateException(otherwise.toString) + }) + ) + else Map()) ++ (if (ribosomalRefFlat != null) ribosomalRefFlat.map("ribosomal_intervals" -> _.getAbsolutePath).toList else Nil)) + ), "cutadapt" -> Map("minimum_length" -> 20), // avoid conflicts when merging since the MarkDuplicate tags often cause merges to fail "picard" -> Map( @@ -297,8 +294,7 @@ class Gentrap(val root: Configurable) extends QScript def summaryFile: File = new File(outputDir, "gentrap.summary.json") /** Files that will be listed in the summary file */ - def summaryFiles: Map[String, File] = Map( - "reference_fasta" -> referenceFasta(), + override def summaryFiles: Map[String, File] = super.summaryFiles ++ Map( "annotation_refflat" -> annotationRefFlat ) ++ Map( "annotation_gtf" -> annotationGtf, @@ -312,13 +308,12 @@ class Gentrap(val root: Configurable) extends QScript def summaryStats: Map[String, Any] = Map() /** Pipeline settings shown in the summary file */ - def summarySettings: Map[String, Any] = Map( + override def summarySettings: Map[String, Any] = super.summarySettings ++ Map( "aligner" -> aligner, "expression_measures" -> expMeasures.toList.map(_.toString), "strand_protocol" -> strandProtocol.toString, "call_variants" -> callVariants, "remove_ribosomal_reads" -> removeRibosomalReads, - "reference" -> referenceSummary, "version" -> FullVersion ) @@ -340,7 +335,9 @@ class Gentrap(val root: Configurable) extends QScript } /** Steps to run before biopetScript */ - def init(): Unit = { + override def init(): Unit = { + super.init() + // TODO: validate that exons are flattened or not (depending on another option flag?) // validate required annotation files if (expMeasures.contains(FragmentsPerGene) && annotationGtf.isEmpty) @@ -368,13 +365,9 @@ class Gentrap(val root: Configurable) extends QScript if (annotationRefFlat.getName.nonEmpty) inputFiles :+= new InputFile(annotationRefFlat) } - /** Pipeline run for each sample */ - def biopetScript(): Unit = { - addSamplesJobs() - } - /** Pipeline run for multiple samples */ - def addMultiSampleJobs(): Unit = { + override def addMultiSampleJobs(): Unit = { + super.addMultiSampleJobs // merge expression tables mergeTableJobs.values.foreach { case maybeJob => maybeJob.foreach(add(_)) } // add heatmap jobs @@ -384,32 +377,31 @@ class Gentrap(val root: Configurable) extends QScript geneFragmentsCountJob } // TODO: use proper notation - addSummaryJobs() - add(pdfTemplateJob) - add(pdfReportJob) + //add(pdfTemplateJob) + //add(pdfReportJob) } /** Returns a [[Sample]] object */ - def makeSample(sampleId: String): Sample = new Sample(sampleId) + override def makeSample(sampleId: String): Sample = new Sample(sampleId) /** * Gentrap sample * * @param sampleId Unique identifier of the sample */ - class Sample(sampleId: String) extends AbstractSample(sampleId) with CufflinksProducer { + class Sample(sampleId: String) extends super.Sample(sampleId) with CufflinksProducer { /** Shortcut to qscript object */ protected def pipeline: Gentrap = qscript /** Summary stats of the sample */ - def summaryStats: Map[String, Any] = Map( + override def summaryStats: Map[String, Any] = super.summaryStats ++ Map( "all_paired" -> allPaired, "all_single" -> allSingle ) /** Summary files of the sample */ - def summaryFiles: Map[String, File] = Map( + override def summaryFiles: Map[String, File] = super.summaryFiles ++ Map( "alignment" -> alnFile ) ++ Map( "gene_fragments_count" -> geneFragmentsCount, @@ -425,13 +417,10 @@ class Gentrap(val root: Configurable) extends QScript "variant_calls" -> variantCalls ).collect { case (key, Some(value)) => key -> value } - /** Per-sample alignment file, pre rRNA cleanup (if chosen) */ - lazy val alnFileDirty: File = sampleAlnJobSet.alnJob.output - /** Per-sample alignment file, post rRNA cleanup (if chosen) */ lazy val alnFile: File = wipeJob match { case Some(j) => j.outputBam - case None => alnFileDirty + case None => preProcessBam.get } /** Read count per gene file */ @@ -698,24 +687,12 @@ class Gentrap(val root: Configurable) extends QScript job } - /** General metrics job, only when library > 1 */ - private lazy val bamMetricsModule: Option[BamMetrics] = (libraries.size > 1) - .option { - val mod = new BamMetrics(qscript) - mod.inputBam = alnFile - mod.outputDir = new File(sampleDir, "metrics") - mod.sampleId = Option(sampleId) - mod.transcriptRefFlatFile = Option(annotationRefFlat) - mod.rnaMetricsSettings = collectRnaSeqMetricsSettings - mod - } - /** Job for removing ribosomal reads */ private def wipeJob: Option[WipeReads] = removeRibosomalReads .option { //require(ribosomalRefFlat.isDefined) val job = new WipeReads(qscript) - job.inputBam = alnFileDirty + job.inputBam = bamFile.get ribosomalRefFlat.foreach(job.intervalFile = _) job.outputBam = createFile(".cleaned.bam") job.discardedBam = createFile(".rrna.bam") @@ -752,33 +729,19 @@ class Gentrap(val root: Configurable) extends QScript } } - /** Job for combining all library BAMs */ - private def sampleAlnJobSet: CombineFileJobSet = - makeCombineJob(libraries.values.map(_.alnFile).toList, createFile(".bam")) - /** Whether all libraries are paired or not */ - def allPaired: Boolean = libraries.values.forall(_.paired) + def allPaired: Boolean = libraries.values.forall(_.mapping.forall(_.input_R2.isDefined)) /** Whether all libraries are single or not */ - def allSingle: Boolean = libraries.values.forall(!_.paired) + def allSingle: Boolean = libraries.values.forall(_.mapping.forall(_.input_R2.isEmpty)) // TODO: add warnings or other messages for config values that are hard-coded by the pipeline /** Adds all jobs for the sample */ - def addJobs(): Unit = { + override def addJobs(): Unit = { + super.addJobs() // TODO: this is our requirement since it's easier to calculate base counts when all libraries are either paired or single require(allPaired || allSingle, s"Sample $sampleId contains only single-end or paired-end libraries") - // add per-library jobs - addPerLibJobs() // merge or symlink per-library alignments - sampleAlnJobSet.addAll() - bamMetricsModule match { - case Some(m) => - m.init() - m.biopetScript() - addAll(m.functions) - addSummaryQScript(m) - case None => ; - } // add bigwig output, also per-strand when possible addAll(Bam2Wig(qscript, alnFile).functions) alnFilePlusStrand.collect { case f => addAll(Bam2Wig(qscript, f).functions) } @@ -802,75 +765,6 @@ class Gentrap(val root: Configurable) extends QScript // add variant calling job if requested varCallJob.foreach(add(_)) } - - /** Returns a [[Library]] object */ - def makeLibrary(libId: String): Library = new Library(libId) - - /** - * Gentrap library - * - * @param libId Unique identifier of the library - */ - class Library(libId: String) extends AbstractLibrary(libId) { - - /** Summary stats of the library */ - def summaryStats: Map[String, Any] = Map() - - /** Summary files of the library */ - def summaryFiles: Map[String, File] = Map( - "alignment" -> mappingJob.outputFiles("finalBamFile") - ) - - /** Convenience method to check whether the library is paired or not */ - def paired: Boolean = config.contains("R2") - - /** Alignment results of this library ~ can only be accessed after addJobs is run! */ - def alnFile: File = mappingJob.outputFiles("finalBamFile") - - /** Wiggle track job */ - private lazy val bam2wigModule: Bam2Wig = Bam2Wig(qscript, alnFile) - - /** Per-library mapping job */ - def mappingJob: Mapping = { - val job = new Mapping(qscript) - job.sampleId = Option(sampleId) - job.libId = Option(libId) - job.outputDir = libDir - job.input_R1 = config("R1") - job.input_R2 = config("R2") - job.init() - job.biopetScript() - job - } - - /** Library metrics job, since we don't have access to the underlying metrics */ - private lazy val bamMetricsJob: BamMetrics = { - val mod = new BamMetrics(qscript) - mod.inputBam = alnFile - mod.outputDir = new File(libDir, "metrics") - mod.sampleId = Option(sampleId) - mod.libId = Option(libId) - mod.rnaMetricsSettings = collectRnaSeqMetricsSettings - mod.transcriptRefFlatFile = Option(annotationRefFlat) - mod - } - - /** Adds all jobs for the library */ - def addJobs(): Unit = { - // create per-library alignment file - addAll(mappingJob.functions) - // Input file checking - inputFiles :::= mappingJob.inputFiles - // add bigwig track - addAll(bam2wigModule.functions) - qscript.addSummaryQScript(mappingJob) - bamMetricsJob.init() - bamMetricsJob.biopetScript() - addAll(bamMetricsJob.functions) - qscript.addSummaryQScript(bamMetricsJob) - } - - } } } diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala new file mode 100644 index 0000000000000000000000000000000000000000..9ed3b00d01d42e21eec88ac3e3ccb5a03af4203d --- /dev/null +++ b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweActivateAfterAnnotImport.scala @@ -0,0 +1,64 @@ +package nl.lumc.sasc.biopet.pipelines.toucan + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.manwe.{ ManweAnnotateVcf, ManweSamplesActivate, ManweSamplesImport } +import nl.lumc.sasc.biopet.utils.config.Configurable + +import scala.io.Source + +/** + * Created by ahbbollen on 9-10-15. + * Wrapper for manwe activate after importing and annotating + */ +class ManweActivateAfterAnnotImport(root: Configurable, + annotate: ManweAnnotateVcf, + imported: ManweSamplesImport) extends ManweSamplesActivate(root) { + + override def beforeGraph: Unit = { + super.beforeGraph + require(annotate != null, "Annotate should be defined") + require(imported != null, "Imported should be defined") + this.deps :+= annotate.jobOutputFile + this.deps :+= imported.jobOutputFile + } + + override def beforeCmd: Unit = { + super.beforeCmd + + this.uri = getUri + } + + def getUriFromFile(f: File): String = { + val r = if (f.exists()) { + val reader = Source.fromFile(f) + val it = reader.getLines() + if (it.isEmpty) { + throw new IllegalArgumentException("Empty manwe stderr file") + } + it.filter(_.contains("Added sample")).toList.head.split(" ").last + } else { + "" + } + r + } + + def getUri: String = { + val err: Option[File] = Some(imported.jobOutputFile) + uri = err match { + case None => "" + case Some(s) => s match { + case null => "" + case other => getUriFromFile(other) + } + case _ => "" + } + + uri + } + + override def subCommand = { + required("samples") + required("activate") + getUri + } + +} diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala new file mode 100644 index 0000000000000000000000000000000000000000..e49ff7e5ed4d7ba91fd0bd1ef8b877bf58c3c296 --- /dev/null +++ b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/ManweDownloadAfterAnnotate.scala @@ -0,0 +1,60 @@ +package nl.lumc.sasc.biopet.pipelines.toucan + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.manwe.{ ManweAnnotateVcf, ManweDataSourcesDownload } +import nl.lumc.sasc.biopet.utils.config.Configurable + +import scala.io.Source + +/** + * Created by ahbbollen on 9-10-15. + */ +class ManweDownloadAfterAnnotate(root: Configurable, + annotate: ManweAnnotateVcf) extends ManweDataSourcesDownload(root) { + + override def beforeGraph: Unit = { + super.beforeGraph + require(annotate != null, "Annotate should be defined") + this.deps :+= annotate.jobOutputFile + } + + override def beforeCmd: Unit = { + super.beforeCmd + + this.uri = getUri + } + + def getUriFromFile(f: File): String = { + val r = if (f.exists()) { + val reader = Source.fromFile(f) + val it = reader.getLines() + if (it.isEmpty) { + throw new IllegalArgumentException("Empty manwe stderr file") + } + it.filter(_.contains("Annotated VCF file")).toList.head.split(" ").last + } else { + "" + } + r + } + + def getUri: String = { + val err: Option[File] = Some(annotate.jobOutputFile) + uri = err match { + case None => "" + case Some(s) => s match { + case null => "" + case other => getUriFromFile(other) + } + case _ => "" + } + + uri + } + + override def subCommand = { + required("data-sources") + required("download") + getUri + } + +} diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala index 22d3f2be919b60836db6584a47ff536fb23b5072..763f7e9adaa8578551de713c728118063fba747c 100644 --- a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala +++ b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala @@ -15,12 +15,15 @@ */ package nl.lumc.sasc.biopet.pipelines.toucan -import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.summary.SummaryQScript -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand, Reference } -import nl.lumc.sasc.biopet.extensions.VariantEffectPredictor -import nl.lumc.sasc.biopet.extensions.tools.{ VcfWithVcf, VepNormalizer } -import nl.lumc.sasc.biopet.utils.ConfigUtils +import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView +import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsIntersect, BedtoolsMerge } +import nl.lumc.sasc.biopet.extensions.manwe.{ ManweAnnotateVcf, ManweSamplesImport } +import nl.lumc.sasc.biopet.extensions.tools.{ GvcfToBed, VcfWithVcf, VepNormalizer } +import nl.lumc.sasc.biopet.extensions.{ Bgzip, Ln, VariantEffectPredictor } +import nl.lumc.sasc.biopet.utils.VcfUtils +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript /** @@ -34,19 +37,34 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum @Input(doc = "Input VCF file", shortName = "Input", required = true) var inputVCF: File = _ + @Input(doc = "Input GVCF file", shortName = "Input", required = false) + var inputGvcf: Option[File] = None + + var sampleIds: List[String] = Nil def init(): Unit = { inputFiles :+= new InputFile(inputVCF) + sampleIds = root match { + case m: MultiSampleQScript => m.samples.keys.toList + case null => VcfUtils.getSampleIds(inputVCF) + case s: SampleLibraryTag => s.sampleId.toList + case _ => throw new IllegalArgumentException("You don't have any samples") + } } override def defaults = Map( "varianteffectpredictor" -> Map("everything" -> true, "failed" -> 1, "allow_non_variant" -> true) ) - //defaults ++= Map("varianteffectpredictor" -> Map("everything" -> true)) - def biopetScript(): Unit = { + val doVarda: Boolean = config("use_varda", default = false) + val useVcf: File = if (doVarda) { + inputGvcf match { + case Some(s) => varda(inputVCF, s) + case _ => throw new IllegalArgumentException("You have not specified a GVCF file") + } + } else inputVCF val vep = new VariantEffectPredictor(this) - vep.input = inputVCF + vep.input = useVcf vep.output = new File(outputDir, inputVCF.getName.stripSuffix(".gz").stripSuffix(".vcf") + ".vep.vcf") vep.isIntermediate = true add(vep) @@ -89,6 +107,112 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum addSummaryJobs() } + /** + * Performs the varda import and activate for one sample + * @param sampleID the sampleID to be used + * @param inputVcf the input VCF + * @param gVCF the gVCF for coverage + * @param annotation: ManweDownloadAnnotateVcf object of annotated vcf + * @return + */ + def importAndActivateSample(sampleID: String, inputVcf: File, + gVCF: File, annotation: ManweAnnotateVcf): ManweActivateAfterAnnotImport = { + + val minGQ: Int = config("minimum_genome_quality", default = 20, submodule = "manwe") + val isPublic: Boolean = config("varda_is_public", default = true, submodule = "manwe") + + val bedTrack = new GvcfToBed(this) + bedTrack.inputVcf = gVCF + bedTrack.outputBed = swapExt(outputDir, gVCF, ".vcf.gz", s""".$sampleID.bed""") + bedTrack.minQuality = minGQ + bedTrack.isIntermediate = true + bedTrack.sample = Some(sampleID) + add(bedTrack) + + val mergedBed = new BedtoolsMerge(this) + mergedBed.input = bedTrack.outputBed + mergedBed.dist = 5 + mergedBed.output = swapExt(outputDir, bedTrack.outputBed, ".bed", ".merged.bed") + add(mergedBed) + + val bgzippedBed = new Bgzip(this) + bgzippedBed.input = List(mergedBed.output) + bgzippedBed.output = swapExt(outputDir, mergedBed.output, ".bed", ".bed.gz") + add(bgzippedBed) + + val singleVcf = new BcftoolsView(this) + singleVcf.input = inputVCF + singleVcf.output = swapExt(outputDir, inputVCF, ".vcf.gz", s""".$sampleID.vcf.gz""") + singleVcf.samples = List(sampleID) + singleVcf.minAC = Some(1) + singleVcf.isIntermediate = true + add(singleVcf) + + val intersected = new BedtoolsIntersect(this) + intersected.input = singleVcf.output + intersected.intersectFile = bgzippedBed.output + intersected.output = swapExt(outputDir, singleVcf.output, ".vcf.gz", ".intersected.vcf") + add(intersected) + + val bgzippedIntersect = new Bgzip(this) + bgzippedIntersect.input = List(intersected.output) + bgzippedIntersect.output = swapExt(outputDir, intersected.output, ".vcf", ".vcf.gz") + add(bgzippedIntersect) + + val imported = new ManweSamplesImport(this) + imported.vcfs = List(bgzippedIntersect.output) + imported.beds = List(bgzippedBed.output) + imported.name = Some(sampleID) + imported.public = isPublic + imported.waitToComplete = false + imported.isIntermediate = true + imported.output = swapExt(outputDir, intersected.output, ".vcf.gz", ".manwe.import") + add(imported) + + val active = new ManweActivateAfterAnnotImport(this, annotation, imported) + active.output = swapExt(outputDir, imported.output, ".import", ".activated") + add(active) + active + + } + + /** + * Perform varda analysis + * @param vcf input vcf + * @param gVcf The gVCF to be used for coverage calculations + * @return return vcf + */ + def varda(vcf: File, gVcf: File): File = { + + val annotationQueries: List[String] = config("annotation_queries", default = List("GLOBAL *"), submodule = "manwe") + //TODO: add groups!!! Need sample-specific group tags for this + + val annotate = new ManweAnnotateVcf(this) + annotate.vcf = vcf + if (annotationQueries.nonEmpty) { + annotate.queries = annotationQueries + } + annotate.waitToComplete = true + annotate.output = swapExt(outputDir, vcf, ".vcf.gz", ".manwe.annot") + annotate.isIntermediate = true + add(annotate) + + val annotatedVcf = new ManweDownloadAfterAnnotate(this, annotate) + annotatedVcf.output = swapExt(outputDir, annotate.output, ".manwe.annot", "manwe.annot.vcf.gz") + add(annotatedVcf) + + val activates = sampleIds map { x => importAndActivateSample(x, vcf, gVcf, annotate) } + + val finalLn = new Ln(this) + activates.foreach(x => finalLn.deps :+= x.output) + finalLn.input = annotatedVcf.output + finalLn.output = swapExt(outputDir, annotatedVcf.output, "manwe.annot.vcf.gz", ".varda_annotated.vcf.gz") + finalLn.relative = true + add(finalLn) + + finalLn.output + } + def summaryFile = new File(outputDir, "Toucan.summary.json") def summaryFiles = Map()