diff --git a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala index 50c15dbabc3b51be862c5a3987704ff10a57106d..20b4ae8422936bfc5a8b3b8310074b01b46445a6 100644 --- a/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala +++ b/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/VariantEffectPredictor.scala @@ -156,9 +156,9 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu override def beforeGraph(): Unit = { super.beforeGraph() if (!cache && !database) { - Logging.addError("Must supply either cache or database for VariantEffectPredictor") + Logging.addError("Must either set 'cache' or 'database' to true for VariantEffectPredictor") } else if (cache && dir.isEmpty) { - Logging.addError("Must supply dir to cache for VariantEffectPredictor") + Logging.addError("Must supply 'dir_cache' to cache for VariantEffectPredictor") } if (statsText) _summary = new File(output.getAbsolutePath + "_summary.txt") } diff --git a/docs/pipelines/toucan.md b/docs/pipelines/toucan.md index 12f4ea108c54402f51d37e6f673d97e633058d6f..5f362bf13fa083d571f57e03f95351fc1231a16b 100644 --- a/docs/pipelines/toucan.md +++ b/docs/pipelines/toucan.md @@ -83,6 +83,25 @@ The following config values are optional: Annotation queries can be set by the `annotation_queries` config value in the `manwe` config namespace. By default, a global query is returned. + +###Groups +In case you want to add your samples to a specific group in your varda database, you can use the tagging system in your sample config. +Specifically, the `varda_group` tag should be a list of strings pointing to group. + +E.g. : + +```json +{ + "samples": { + "sample1": { + "tags": { + "varda_group": ["group1", "group2"] + } + } + } +} +``` + Running the pipeline --------------- The command to run the pipeline is: diff --git a/toucan/pom.xml b/toucan/pom.xml index 781e458c31cc8128843b55873781e3aaa9f8b1e0..62a9699f8f63680d1c8b6bce67a7390907ca8b34 100644 --- a/toucan/pom.xml +++ b/toucan/pom.xml @@ -43,5 +43,17 @@ <artifactId>BiopetToolsExtensions</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.testng</groupId> + <artifactId>testng</artifactId> + <version>6.8</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.10</artifactId> + <version>2.2.1</version> + <scope>test</scope> + </dependency> </dependencies> </project> diff --git a/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala index 2e3ffe26a1735dbea0f664bf8c4c957bd7262f7b..58dcaf82447daef751eae0d21ac7c43387026149 100644 --- a/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala +++ b/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala @@ -40,15 +40,17 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum @Input(doc = "Input GVCF file", shortName = "gvcf", required = false) var inputGvcf: Option[File] = None - var sampleIds: List[String] = Nil + var outputVcf: Option[File] = None + + def sampleInfo: Map[String, Map[String, Any]] = root match { + case m: MultiSampleQScript => m.samples.map { case (sampleId, sample) => sampleId -> sample.sampleTags } + case null => VcfUtils.getSampleIds(inputVCF).map(x => x -> Map[String, Any]()).toMap + case s: SampleLibraryTag => s.sampleId.map(x => x -> Map[String, Any]()).toMap + case _ => throw new IllegalArgumentException("") + } + def init(): Unit = { inputFiles :+= new InputFile(inputVCF) - sampleIds = root match { - case m: MultiSampleQScript => m.samples.keys.toList - case null => VcfUtils.getSampleIds(inputVCF) - case s: SampleLibraryTag => s.sampleId.toList - case _ => throw new IllegalArgumentException("You don't have any samples") - } } override def defaults = Map( @@ -79,29 +81,29 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum val gonlVcfFile: Option[File] = config("gonl_vcf") val exacVcfFile: Option[File] = config("exac_vcf") - var outputFile = normalizer.outputVcf + outputVcf = Some(normalizer.outputVcf) gonlVcfFile match { case Some(gonlFile) => val vcfWithVcf = new VcfWithVcf(this) - vcfWithVcf.input = outputFile + vcfWithVcf.input = outputVcf.getOrElse(new File("")) vcfWithVcf.secondaryVcf = gonlFile vcfWithVcf.output = swapExt(outputDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz") vcfWithVcf.fields ::= ("AF", "AF_gonl", None) add(vcfWithVcf) - outputFile = vcfWithVcf.output + outputVcf = Some(vcfWithVcf.output) case _ => } exacVcfFile match { case Some(exacFile) => val vcfWithVcf = new VcfWithVcf(this) - vcfWithVcf.input = outputFile + vcfWithVcf.input = outputVcf.getOrElse(new File("")) vcfWithVcf.secondaryVcf = exacFile - vcfWithVcf.output = swapExt(outputDir, outputFile, ".vcf.gz", ".exac.vcf.gz") + vcfWithVcf.output = swapExt(outputDir, outputVcf.getOrElse(new File("")), ".vcf.gz", ".exac.vcf.gz") vcfWithVcf.fields ::= ("AF", "AF_exac", None) add(vcfWithVcf) - outputFile = vcfWithVcf.output + outputVcf = Some(vcfWithVcf.output) case _ => } @@ -116,7 +118,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum * @param annotation: ManweDownloadAnnotateVcf object of annotated vcf * @return */ - def importAndActivateSample(sampleID: String, inputVcf: File, + def importAndActivateSample(sampleID: String, sampleGroups: List[String], inputVcf: File, gVCF: File, annotation: ManweAnnotateVcf): ManweActivateAfterAnnotImport = { val minGQ: Int = config("minimum_genome_quality", default = 20, namespace = "manwe") @@ -165,6 +167,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum imported.beds = List(bgzippedBed.output) imported.name = Some(sampleID) imported.public = isPublic + imported.group = sampleGroups imported.waitToComplete = false imported.isIntermediate = true imported.output = swapExt(outputDir, intersected.output, ".vcf.gz", ".manwe.import") @@ -186,7 +189,6 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum def varda(vcf: File, gVcf: File): File = { val annotationQueries: List[String] = config("annotation_queries", default = List("GLOBAL *"), namespace = "manwe") - //TODO: add groups!!! Need sample-specific group tags for this val annotate = new ManweAnnotateVcf(this) annotate.vcf = vcf @@ -202,7 +204,14 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum annotatedVcf.output = swapExt(outputDir, annotate.output, ".manwe.annot", "manwe.annot.vcf.gz") add(annotatedVcf) - val activates = sampleIds map { x => importAndActivateSample(x, vcf, gVcf, annotate) } + val activates = sampleInfo map { x => + val sampleGroup = x._2.getOrElse("varda_group", Nil) match { + case x: List[String] => x + case Nil => Nil + case _ => throw new IllegalArgumentException("Sample tag 'varda_group' is not a list of strings") + } + importAndActivateSample(x._1, sampleGroup, vcf, gVcf, annotate) + } val finalLn = new Ln(this) activates.foreach(x => finalLn.deps :+= x.output)