From 0c4cb9495213a1806f3838d9ca36815c96ec1db9 Mon Sep 17 00:00:00 2001
From: Sander Bollen <a.h.b.bollen@lumc.nl>
Date: Tue, 23 Jun 2015 11:26:45 +0200
Subject: [PATCH] reference module

---
 docs/config.md            | 10 +++++++++-
 docs/general/config.md    |  9 ++++++++-
 docs/pipelines/gentrap.md |  4 ++--
 docs/pipelines/mapping.md |  4 ++--
 docs/pipelines/shiva.md   |  3 ++-
 5 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/docs/config.md b/docs/config.md
index de3342b19..79bf59bed 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -69,10 +69,18 @@ Global setting examples are:
 
 ----
 
+#### References
+Pipelines and tools that use references should now use the reference module. This gives some more fine-grained control over references.
+E.g. pipelines and tools that use a fasta references file should now set value `reference_fasta`.
+Additionally, we can set `reference_name` for the name to be used (e.g. `hg19`). If unset, Biopet will default to `unknown`.
+It is also possible to set the `species` flag. Again, we will default to `unknown` if unset.
+
 #### Example settings config
 ~~~
 {
-        "reference": "/references/hg19_nohap/ucsc.hg19_nohap.fasta",
+        "reference_fasta": "/references/hg19_nohap/ucsc.hg19_nohap.fasta",
+        "reference_name": "hg19_nohap",
+        "species": "homo_sapiens",
         "dbsnp": "/references/hg19_nohap/dbsnp_137.hg19_nohap.vcf",
         "joint_variantcalling": false,
         "haplotypecaller": { "scattercount": 100 },
diff --git a/docs/general/config.md b/docs/general/config.md
index 10d69a269..b08d11bf6 100644
--- a/docs/general/config.md
+++ b/docs/general/config.md
@@ -69,10 +69,17 @@ Global setting examples are:
 
 ----
 
+#### References
+Pipelines and tools that use references should now use the reference module. This gives some more fine-grained control over references.
+E.g. pipelines and tools that use a fasta references file should now set value `reference_fasta`.
+Additionally, we can set `reference_name` for the name to be used (e.g. `hg19`). If unset, Biopet will default to `unknown`.
+It is also possible to set the `species` flag. Again, we will default to `unknown` if unset.
 #### Example settings config
 ~~~
 {
-        "reference": "/data/LGTC/projects/vandoorn-melanoma/data/references/hg19_nohap/ucsc.hg19_nohap.fasta",
+        "reference_fasta": "/references/hg19_nohap/ucsc.hg19_nohap.fasta",
+        "reference_name": "hg19_nohap",
+        "species": "homo_sapiens",
         "dbsnp": "/data/LGTC/projects/vandoorn-melanoma/data/references/hg19_nohap/dbsnp_137.hg19_nohap.vcf",
         "joint_variantcalling": false,
         "haplotypecaller": { "scattercount": 100 },
diff --git a/docs/pipelines/gentrap.md b/docs/pipelines/gentrap.md
index cfb99916c..0c73201fb 100644
--- a/docs/pipelines/gentrap.md
+++ b/docs/pipelines/gentrap.md
@@ -78,7 +78,7 @@ For the pipeline settings, there are some values that you need to specify while
 
 1. `output_dir`: path to output directory (if it does not exist, Gentrap will create it for you).
 2. `aligner`: which aligner to use (`gsnap` or `tophat`)
-3. `reference`: this must point to a reference FASTA file and in the same directory, there must be a `.dict` file of the FASTA file.
+3. `reference_fasta`: this must point to a reference FASTA file and in the same directory, there must be a `.dict` file of the FASTA file.
 4. `expression_measures`: this entry determines which expression measurement modes Gentrap will do. You can choose zero or more from the following: `fragments_per_gene`, `bases_per_gene`, `bases_per_exon`, `cufflinks_strict`, `cufflinks_guided`, and/or `cufflinks_blind`. If you only wish to align, you can set the value as an empty list (`[]`).
 5. `strand_protocol`: this determines whether your library is prepared with a specific stranded protocol or not. There are two protocols currently supported now: `dutp` for dUTP-based protocols and `non_specific` for non-strand-specific protocols.
 6. `annotation_refflat`: contains the path to an annotation refFlat file of the entire genome
@@ -100,7 +100,7 @@ Thus, an example settings configuration is as follows:
   "output_dir": "/path/to/output/dir",
   "expression_measures": ["fragments_per_gene", "bases_per_gene"],
   "strand_protocol": "dutp",
-  "reference": "/path/to/reference",
+  "reference_fasta": "/path/to/reference",
   "annotation_gtf": "/path/to/gtf",
   "annotation_refflat": "/path/to/refflat",
   "gsnap": {
diff --git a/docs/pipelines/mapping.md b/docs/pipelines/mapping.md
index 9365d77ac..5fae9a6a6 100644
--- a/docs/pipelines/mapping.md
+++ b/docs/pipelines/mapping.md
@@ -42,7 +42,7 @@ All other values should be provided in the config. Specific config values toward
 | skip_flexiprep | Boolean (optional) | Whether to skip the flexiprep QC step (default = False) |
 | skip_markduplicates | Boolean (optional) | Whether to skip the Picard Markduplicates step (default = False) |
 | skip_metrics | Boolean (optional) | Whether to skip the metrics gathering step (default = False) |
-| reference | Path (**required**) | Path to indexed fasta file to be used as reference |
+| reference_fasta | Path (**required**) | Path to indexed fasta file to be used as reference |
 | platform | String (optional) | Read group Platform (defaults to `illumina`)|
 | platform_unit | String (**required**) | Read group platform unit |
 | readgroup_sequencing_center | String (**required**) | Read group sequencing center |
@@ -60,7 +60,7 @@ Any supplied sample config will be ignored.
 ### Example config
 ```json
 {
-"reference": "<path/to/reference">,
+"reference_fasta": "<path/to/reference">,
 "aligner": "bwa",
 "skip_metrics": true,
 "platform": "our_platform",
diff --git a/docs/pipelines/shiva.md b/docs/pipelines/shiva.md
index 7fcef405d..9f3b0076f 100644
--- a/docs/pipelines/shiva.md
+++ b/docs/pipelines/shiva.md
@@ -105,7 +105,7 @@ To view all possible config options please navigate to our Gitlab wiki page
 
 | Namespace | Name |  Type | Default | Function |
 | ----------- | ---- | ----- | ------- | -------- |
-| shiva | reference | String |  | reference to align to |
+| shiva | reference_fasta | String |  | reference to align to |
 | shiva | dbsnp | String |  | vcf file of dbsnp records |
 | shiva | variantcallers | List[String] |  | variantcaller to use, see list |
 | shiva | use_indel_realigner | Boolean | true | Realign indels |
@@ -117,6 +117,7 @@ To view all possible config options please navigate to our Gitlab wiki page
 | vcffilter | min_alternate_depth | Integer | 2 | Filter variants with at least x depth on the alternate allele |
 | vcffilter | min_samples_pass | Integer | 1 | Minimum amount of samples which pass custom filter (requires additional flags) |
 | vcffilter | filter_ref_calls | Boolean | true | Remove reference calls |
+| vcfstats | reference | String | Path to reference to be used by `vcfstats` |
 
 Since Shiva uses the [Mapping](../mapping.md) pipeline internally, mapping config values can be specified as well.
 For all the options, please see the corresponding documentation for the mapping pipeline.
-- 
GitLab