Commit d82c4932 authored by van den Berg's avatar van den Berg
Browse files

Rename bedfile to targetsfile

Hutspot now supports two bed files to calculate coverage. One is the
`targetsfile`, which was called `bedfile` before, and holds the targets
of the capture kit. The other one is `baitsfile`, which holds the bait
locations of the capture kit.

It is possible to specify only the `targetsfile`, but if you specify the
`baitsfile`, the `targetsfile` must be specified as well, since both are
required by picard HsMetrics.

Also added a test for invalid configuration files, and shortened the
jsonschema validation error to only show the human readable message.
parent ca0c28db
......@@ -110,10 +110,11 @@ The following reference files **must** be provided in the configuration:
The following reference files **may** be provided:
1. `bedfile`: A bedfile to calculate coverage cover the specified regions.
2. `refflat`: A refFlat file to calculate coverage over transcripts.
3. `scatter_size`: Size of the chunks to split the variant calling into.
4. `female_threshold`: Fraction of reads between X and the autosomes to call as
1. `targetsfile`: Bed file of the targets of the capture kit. Used to calculate coverage.
2. `baitsfile`: Bed file of the baits of the capture kit. Used to calculate picard HsMetric.
3. `refflat`: A refFlat file to calculate coverage over transcripts.
4. `scatter_size`: Size of the chunks to split the variant calling into.
5. `female_threshold`: Fraction of reads between X and the autosomes to call as
female.
......@@ -152,7 +153,8 @@ The following configuration options are **optional**:
| configuration | description |
| ------------- | ----------- |
| `bed` | Comma-separate list of paths to BED files of interest |
| `targetsfile` | Bed file of the targets of the capture kit. Used to calculate coverage |
| `baitsfile` | Bed file of the baits of the capture kit. Used to calculate picard HsMetrics |
| `female_threshold` | Float between 0 and 1 that signifies the threshold of
the ratio between coverage on X/overall coverage that 'calls' a sample as
female. Default = 0.6 |
......
# hutspot - a DNAseq variant calling pipeline
# Copyright (C) 2017-2019, Sander Bollen, Leiden University Medical Center
# Copyright (C) 2017-2019, Leiden University Medical Center
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
......@@ -36,8 +36,13 @@ with open(srcdir('config/schema.json'), 'rt') as fin:
try:
jsonschema.validate(config, schema)
except jsonschema.ValidationError as e:
raise jsonschema.ValidationError(f'Invalid CONFIG_JSON: {e}')
raise jsonschema.ValidationError(f'Invalid --configfile: {e.message}')
# If you specify a baitsfile, you also have to specify a targets file for
# picard
if "baitsfile" in config and "targetsfile" not in config:
msg = 'Invalid --configfile: "baitsfile" specified without "targetsfile"'
raise jsonschema.ValidationError(msg)
# Set default values
def set_default(key, value):
......@@ -380,7 +385,7 @@ rule covstats:
bam = rules.markdup.output.bam,
genome = "current.genome",
covpy = config["covstats"],
bed = config.get("bedfile","")
bed = config.get("targetsfile","")
params:
subt = "Sample {sample}"
output:
......@@ -423,7 +428,7 @@ rule collectstats:
mbnum = rules.mapped_reads_bases.output.bases,
unum = rules.unique_reads_bases.output.reads,
ubnum = rules.unique_reads_bases.output.bases,
cov = rules.covstats.output.covj if "bedfile" in config else [],
cov = rules.covstats.output.covj if "targetsfile" in config else [],
cutadapt = rules.collect_cutadapt_summary.output,
colpy = config["collect_stats"]
params:
......@@ -461,7 +466,7 @@ rule bed_to_interval:
picard can read
"""
input:
targets = config.get("bedfile",""),
targets = config.get("targetsfile",""),
baits = config.get("baitsfile",""),
ref = config["reference"]
output:
......
......@@ -59,12 +59,12 @@
"description": "Fraction of reads between X and the autosomes to call as female",
"type": "number"
},
"bedfile": {
"description": "Bed file to calculate the coverage over",
"targetsfile": {
"description": "Bed file of the targets of the capture kit. Used to calculate coverage",
"type": "string"
},
"baitsfile": {
"description": "Bed file of the baits of the capture kit",
"description": "Bed file of the baits of the capture kit. Used to calculate picard HsMetrics",
"type": "string"
},
"refflat": {
......
{
"samples": {
"micro": {
"read_groups": {
"lib_01": {
"R1": "tests/data/fastq/micro_R1.fq.gz",
"R2": "tests/data/fastq/micro_R2.fq.gz"
}
}
}
},
"dbsnp": "tests/data/reference/database.vcf.gz",
"known_sites": ["tests/data/reference/database.vcf.gz"],
"reference": "tests/data/reference/ref.fa",
"baitsfile": "tests/data/reference/target_baits.bed"
}
......@@ -12,5 +12,5 @@
"reference":"tests/data/reference/ref.fa",
"dbsnp": "tests/data/reference/database.vcf.gz",
"known_sites": ["tests/data/reference/database.vcf.gz"],
"bedfile": "tests/data/reference/full_chrM.bed"
"targetsfile": "tests/data/reference/full_chrM.bed"
}
......@@ -12,5 +12,5 @@
"reference":"tests/data/reference/ref.fa",
"dbsnp": "tests/data/reference/database.vcf.gz",
"known_sites": ["tests/data/reference/database.vcf.gz"],
"bedfile": "tests/data/reference/target_genes.bed"
"targetsfile": "tests/data/reference/target_genes.bed"
}
......@@ -12,6 +12,6 @@
"reference":"tests/data/reference/ref.fa",
"dbsnp": "tests/data/reference/database.vcf.gz",
"known_sites": ["tests/data/reference/database.vcf.gz"],
"bedfile": "tests/data/reference/target_genes.bed",
"targetsfile": "tests/data/reference/target_genes.bed",
"baitsfile": "tests/data/reference/target_baits.bed"
}
......@@ -16,5 +16,5 @@
"reference":"tests/data/reference/ref.fa",
"dbsnp": "tests/data/reference/database.vcf.gz",
"known_sites": ["tests/data/reference/database.vcf.gz"],
"bedfile": "tests/data/reference/full_chrM.bed"
"targetsfile": "tests/data/reference/full_chrM.bed"
}
......@@ -20,5 +20,5 @@
"reference":"tests/data/reference/ref.fa",
"dbsnp": "tests/data/reference/database.vcf.gz",
"known_sites": ["tests/data/reference/database.vcf.gz"],
"bedfile": "tests/data/reference/full_chrM.bed"
"targetsfile": "tests/data/reference/full_chrM.bed"
}
......@@ -6,7 +6,7 @@
exit_code: 1
stdout:
contains:
- "Invalid CONFIG_JSON: 'reference' is a required property"
- "Invalid --configfile: 'reference' is a required property"
tags:
- sanity
......@@ -38,3 +38,14 @@
- "singularity version 3"
tags:
- sanity
- name: test-baits-only
tags:
- sanity
command: >
snakemake -n Snakefile
--configfile tests/data/config/invalid_config_baitsfile_only.json
exit_code: 1
stdout:
contains:
- 'Invalid --configfile: "baitsfile" specified without "targetsfile"'
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment