Commit 142fa3cc authored by van den Berg's avatar van den Berg
Browse files

Remove legacy support for conda

parent 45387234
......@@ -16,7 +16,7 @@ GATK HaplotypeCaller.
* No unnecessary jobs
* Coverage metrics for any number of bed files.
* Fully containerized rules through singularity and biocontainers. Legacy
conda environments are available as well.
conda environments are no long available.
* Optionally sub-sample inputs when number of bases exceeds a user-defined
threshold.
......@@ -47,10 +47,9 @@ pip install -r requirements.txt
We highly recommend the user of the containerized rules through
[singularity](https://www.sylabs.io/singularity/).
This option does, however,
require you to install singularity on your system. As this usually requires
administrative privileges, singularity is not contained within our provided
conda environment file.
This option does require you to install singularity on your system. As this
usually requires administrative privileges, singularity is not contained
within our provided conda environment file.
If you want to use singularity, make sure you install version 3 or higher.
......@@ -225,21 +224,6 @@ FASTQ_COUNT=/path/to/fastq-count \
BED=/path/to/interesting_region.bed
```
## Using conda instead of singularity
Legacy conda environments are also available for each and every rule.
Simply use `--use-conda` instead of `--use-singularity` to enable conda
environments.
As dependency conflicts can and do arise with conda, it is recommended to
combine this flag with `--conda-prefix`, such that you only have to
build the environments once.
The conda environments use the same versions of tools as the singularity
containers, bar one:
* `fastqc` uses version 0.11.5 on conda, but 0.11.7 on singularity.
# Graph
Below you can see the rulegraph of the pipeline. The main variant calling flow
......
......@@ -231,7 +231,6 @@ rule seqtk_r1:
max_bases=str(MAX_BASES)
output:
fastq=temp("{sample}/pre_process/{sample}.sampled_R1.fastq.gz")
conda: "envs/seqtk.yml"
singularity: "docker://quay.io/biocontainers/mulled-v2-13686261ac0aa5682c680670ff8cda7b09637943:d143450dec169186731bb4df6f045a3c9ee08eb6-0"
shell: "bash {input.seqtk} {input.stats} {input.fastq} {output.fastq} "
"{params.max_bases}"
......@@ -247,7 +246,6 @@ rule seqtk_r2:
max_bases =str(MAX_BASES)
output:
fastq = temp("{sample}/pre_process/{sample}.sampled_R2.fastq.gz")
conda: "envs/seqtk.yml"
singularity: "docker://quay.io/biocontainers/mulled-v2-13686261ac0aa5682c680670ff8cda7b09637943:d143450dec169186731bb4df6f045a3c9ee08eb6-0"
shell: "bash {input.seqtk} {input.stats} {input.fastq} {output.fastq} "
"{params.max_bases}"
......@@ -266,7 +264,6 @@ rule sickle:
r2 = temp("{sample}/pre_process/{sample}.trimmed_R2.fastq"),
s = "{sample}/pre_process/{sample}.trimmed_singles.fastq"
singularity: "docker://quay.io/biocontainers/sickle-trim:1.33--ha92aebf_4"
conda: "envs/sickle.yml"
shell: "sickle pe -f {input.r1} -r {input.r2} -t sanger -o {output.r1} "
"-p {output.r2} -s {output.s}"
......@@ -279,7 +276,6 @@ rule cutadapt:
r1 = temp("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
r2 = temp("{sample}/pre_process/{sample}.cutadapt_R2.fastq")
singularity: "docker://quay.io/biocontainers/cutadapt:1.14--py36_0"
conda: "envs/cutadapt.yml"
shell: "cutadapt -a AGATCGGAAGAG -A AGATCGGAAGAG -m 1 -o {output.r1} "
"{input.r1} -p {output.r2} {input.r2}"
......@@ -294,7 +290,6 @@ rule align:
rg = "@RG\\tID:{sample}_lib1\\tSM:{sample}\\tPL:ILLUMINA"
output: temp("{sample}/bams/{sample}.sorted.bam")
singularity: "docker://quay.io/biocontainers/mulled-v2-002f51ea92721407ef440b921fb5940f424be842:43ec6124f9f4f875515f9548733b8b4e5fed9aa6-0"
conda: "envs/bwa.yml"
shell: "bwa mem -t 8 -R '{params.rg}' {input.ref} {input.r1} {input.r2} "
"| picard -Xmx4G SortSam CREATE_INDEX=TRUE TMP_DIR={input.temp} "
"INPUT=/dev/stdin OUTPUT={output} SORT_ORDER=coordinate"
......@@ -309,7 +304,6 @@ rule markdup:
bai = "{sample}/bams/{sample}.markdup.bai",
metrics = "{sample}/bams/{sample}.markdup.metrics"
singularity: "docker://quay.io/biocontainers/picard:2.14--py36_0"
conda: "envs/picard.yml"
shell: "picard -Xmx4G MarkDuplicates CREATE_INDEX=TRUE TMP_DIR={input.tmp} "
"INPUT={input.bam} OUTPUT={output.bam} "
"METRICS_FILE={output.metrics} "
......@@ -336,7 +330,6 @@ rule baserecal:
output:
grp = "{sample}/bams/{sample}.baserecal.grp"
singularity: "docker://quay.io/biocontainers/gatk:3.7--py36_1"
conda: "envs/gatk.yml"
shell: "java -XX:ParallelGCThreads=1 -jar {input.gatk} -T "
"BaseRecalibrator -I {input.bam} -o {output.grp} -nct 8 "
"-R {input.ref} -cov ReadGroupCovariate -cov QualityScoreCovariate "
......@@ -358,7 +351,6 @@ rule gvcf_scatter:
gvcf=temp("{sample}/vcf/{sample}.{chunk}.part.vcf.gz"),
gvcf_tbi=temp("{sample}/vcf/{sample}.{chunk}.part.vcf.gz.tbi")
singularity: "docker://quay.io/biocontainers/gatk:3.7--py36_1"
conda: "envs/gatk.yml"
shell: "java -jar -Xmx4G -XX:ParallelGCThreads=1 {input.gatk} "
"-T HaplotypeCaller -ERC GVCF -I "
"{input.bam} -R {input.ref} -D {input.dbsnp} "
......@@ -376,7 +368,7 @@ rule gvcf_chunkfile:
an "argument list too long" error.
See https://unix.stackexchange.com/a/120842 for more info
This also means this rule lives outside of singularity/conda and is
This also means this rule lives outside of singularity and is
executed in snakemake's own environment.
"""
params:
......@@ -398,7 +390,6 @@ rule gvcf_gather:
chunkfile = "{sample}/vcf/chunkfile.txt"
output:
gvcf = "{sample}/vcf/{sample}.g.vcf.gz"
conda: "envs/bcftools.yml"
singularity: "docker://quay.io/biocontainers/bcftools:1.9--ha228f0b_4"
shell: "bcftools concat -f {input.chunkfile} -n > {output.gvcf}"
......@@ -409,7 +400,6 @@ rule gvcf_gather_tbi:
gvcf = "{sample}/vcf/{sample}.g.vcf.gz"
output:
tbi = "{sample}/vcf/{sample}.g.vcf.gz.tbi"
conda: "envs/tabix.yml"
singularity: "docker://quay.io/biocontainers/tabix:0.2.6--ha92aebf_0"
shell: "tabix -pvcf {input.gvcf}"
......@@ -430,7 +420,6 @@ rule genotype_scatter:
vcf=temp("multisample/genotype.{chunk}.part.vcf.gz"),
vcf_tbi=temp("multisample/genotype.{chunk}.part.vcf.gz.tbi")
singularity: "docker://quay.io/biocontainers/gatk:3.7--py36_1"
conda: "envs/gatk.yml"
shell: "java -jar -Xmx15G -XX:ParallelGCThreads=1 {input.gatk} -T "
"GenotypeGVCFs -R {input.ref} "
"-V {params.li} -L '{params.chunk}' -o '{output.vcf}'"
......@@ -445,7 +434,7 @@ rule genotype_chunkfile:
an "argument list too long" error.
See https://unix.stackexchange.com/a/120842 for more info
This also means this rule lives outside of singularity/conda and is
This also means this rule lives outside of singularity and is
executed in snakemake's own environment.
"""
params:
......@@ -467,7 +456,6 @@ rule genotype_gather:
chunkfile = "multisample/chunkfile.txt"
output:
vcf = "multisample/genotyped.vcf.gz"
conda: "envs/bcftools.yml"
singularity: "docker://quay.io/biocontainers/bcftools:1.9--ha228f0b_4"
shell: "bcftools concat -f {input.chunkfile} -n > {output.vcf}"
......@@ -478,7 +466,6 @@ rule genotype_gather_tbi:
vcf = "multisample/genotyped.vcf.gz"
output:
tbi = "multisample/genotyped.vcf.gz.tbi"
conda: "envs/tabix.yml"
singularity: "docker://quay.io/biocontainers/tabix:0.2.6--ha92aebf_0"
shell: "tabix -pvcf {input.vcf}"
......@@ -495,7 +482,6 @@ rule split_vcf:
output:
splitted="{sample}/vcf/{sample}_single.vcf.gz"
singularity: "docker://quay.io/biocontainers/gatk:3.7--py36_1"
conda: "envs/gatk.yml"
shell: "java -Xmx15G -XX:ParallelGCThreads=1 -jar {input.gatk} "
"-T SelectVariants -sn {params.s} -env -R {input.ref} -V "
"{input.vcf} -o {output.splitted}"
......@@ -510,7 +496,6 @@ rule mapped_num:
output:
num="{sample}/bams/{sample}.mapped.num"
singularity: "docker://quay.io/biocontainers/samtools:1.6--he673b24_3"
conda: "envs/samtools.yml"
shell: "samtools view -F 4 {input.bam} | wc -l > {output.num}"
......@@ -521,7 +506,6 @@ rule mapped_basenum:
output:
num="{sample}/bams/{sample}.mapped.basenum"
singularity: "docker://quay.io/biocontainers/samtools:1.6--he673b24_3"
conda: "envs/samtools.yml"
shell: "samtools view -F 4 {input.bam} | cut -f10 | wc -c > {output.num}"
......@@ -532,7 +516,6 @@ rule unique_num:
output:
num="{sample}/bams/{sample}.unique.num"
singularity: "docker://quay.io/biocontainers/samtools:1.6--he673b24_3"
conda: "envs/samtools.yml"
shell: "samtools view -F 4 -F 1024 {input.bam} | wc -l > {output.num}"
......@@ -543,7 +526,6 @@ rule usable_basenum:
output:
num="{sample}/bams/{sample}.usable.basenum"
singularity: "docker://quay.io/biocontainers/samtools:1.6--he673b24_3"
conda: "envs/samtools.yml"
shell: "samtools view -F 4 -F 1024 {input.bam} | cut -f10 | wc -c > "
"{output.num}"
......@@ -564,7 +546,6 @@ rule fastqc_raw:
output:
aux="{sample}/pre_process/raw_fastqc/.done.txt"
singularity: "docker://quay.io/biocontainers/fastqc:0.11.7--4"
conda: "envs/fastqc.yml"
shell: "fastqc --nogroup -o {params.odir} {input.r1} {input.r2} "
"&& echo 'done' > {output.aux}"
......@@ -585,7 +566,6 @@ rule fastqc_merged:
r1="{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
r2="{sample}/pre_process/merged_fastqc/{sample}.merged_R2_fastqc.zip"
singularity: "docker://quay.io/biocontainers/fastqc:0.11.7--4"
conda: "envs/fastqc.yml"
shell: "bash {input.fq} {input.r1} {input.r2} "
"{output.r1} {output.r2} {params.odir}"
......@@ -606,7 +586,6 @@ rule fastqc_postqc:
r1="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip",
r2="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R2_fastqc.zip"
singularity: "docker://quay.io/biocontainers/fastqc:0.11.7--4"
conda: "envs/fastqc.yml"
shell: "bash {input.fq} {input.r1} {input.r2} "
"{output.r1} {output.r2} {params.odir}"
......@@ -621,7 +600,6 @@ rule fqcount_preqc:
output:
"{sample}/pre_process/{sample}.preqc_count.json"
singularity: "docker://quay.io/biocontainers/fastq-count:0.1.0--h14c3975_0"
conda: "envs/fastq-count.yml"
shell: "fastq-count {input.r1} {input.r2} > {output}"
......@@ -633,7 +611,6 @@ rule fqcount_postqc:
output:
"{sample}/pre_process/{sample}.postqc_count.json"
singularity: "docker://quay.io/biocontainers/fastq-count:0.1.0--h14c3975_0"
conda: "envs/fastq-count.yml"
shell: "fastq-count {input.r1} {input.r2} > {output}"
......@@ -647,7 +624,6 @@ rule fastqc_stats:
postqc_r2="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R2_fastqc.zip",
sc=fqpy
singularity: "docker://python:3.6-slim"
conda: "envs/collectstats.yml"
output:
"{sample}/pre_process/fastq_stats.json"
shell: "python {input.sc} --preqc-r1 {input.preqc_r1} "
......@@ -670,7 +646,6 @@ rule covstats:
covj="{sample}/coverage/{bed}.covstats.json",
covp="{sample}/coverage/{bed}.covstats.png"
singularity: "docker://quay.io/biocontainers/mulled-v2-3251e6c49d800268f0bc575f28045ab4e69475a6:4ce073b219b6dabb79d154762a9b67728c357edb-0"
conda: "envs/covstat.yml"
shell: "bedtools coverage -sorted -g {input.genome} -a {input.bed} "
"-b {input.bam} -d | python {input.covpy} - --plot {output.covp} "
"--title 'Targets coverage' --subtitle '{params.subt}' "
......@@ -686,7 +661,6 @@ rule vtools_coverage:
output:
tsv="{sample}/coverage/{ref}.coverages.tsv"
singularity: "docker://quay.io/biocontainers/vtools:1.0.0--py37h3010b51_0"
conda: "envs/vcfstats.yml"
shell: "vtools-gcoverage -I {input.gvcf} -R {input.ref} > {output.tsv}"
......@@ -700,7 +674,6 @@ rule vcfstats:
output:
stats="multisample/vcfstats.json"
singularity: "docker://quay.io/biocontainers/vtools:1.0.0--py37h3010b51_0"
conda: "envs/vcfstats.yml"
shell: "vtools-stats -i {input.vcf} > {output.stats}"
......@@ -725,7 +698,6 @@ if len(BASE_BEDS) >= 1:
output:
"{sample}/{sample}.stats.json"
singularity: "docker://quay.io/biocontainers/vtools:1.0.0--py37h3010b51_0"
conda: "envs/collectstats.yml"
shell: "python {input.colpy} --sample-name {params.sample_name} "
"--pre-qc-fastq {input.preqc} --post-qc-fastq {input.postq} "
"--mapped-num {input.mnum} --mapped-basenum {input.mbnum} "
......@@ -750,7 +722,6 @@ else:
output:
"{sample}/{sample}.stats.json"
singularity: "docker://quay.io/biocontainers/vtools:1.0.0--py37h3010b51_0"
conda: "envs/collectstats.yml"
shell: "python {input.colpy} --sample-name {params.sample_name} "
"--pre-qc-fastq {input.preqc} --post-qc-fastq {input.postq} "
"--mapped-num {input.mnum} --mapped-basenum {input.mbnum} "
......@@ -767,7 +738,6 @@ rule merge_stats:
output:
stats="stats.json"
singularity: "docker://quay.io/biocontainers/vtools:1.0.0--py37h3010b51_0"
conda: "envs/collectstats.yml"
shell: "python {input.mpy} --vcfstats {input.vstat} {input.cols} "
"> {output.stats}"
......@@ -780,7 +750,6 @@ rule stats_tsv:
output:
stats="stats.tsv"
singularity: "docker://python:3.6-slim"
conda: "envs/collectstats.yml"
shell: "python {input.sc} -i {input.stats} > {output.stats}"
......@@ -797,5 +766,4 @@ rule multiqc:
output:
report="multiqc_report/multiqc_report.html"
singularity: "docker://quay.io/biocontainers/multiqc:1.5--py36_0"
conda: "envs/multiqc.yml"
shell: "multiqc -f -o {params.rdir} {params.odir} || touch {output.report}"
- name: test-integration-run
command: >-
bash -c '
snakemake --use-conda --conda-prefix ${CONDA_PREFIX} --jobs 100 -w 120
--cluster "sbatch --parsable"
--cluster-status "${PWD}/cluster/slurm-cluster-status/slurm-cluster-status.py"
snakemake
--use-singularity
--singularity-prefix /tmp/singularity
--jobs 10 -w 120
-r -p -s Snakefile
--config JAVA=$(which java)
REFERENCE=${FUNCTIONAL_REF} GATK=${GATK_JAR}
DBSNP=${FUNCTIONAL_DBSNP}
ONETHOUSAND=${FUNCTIONAL_ONETHOUSAND}
HAPMAP=${FUNCTIONAL_HAPMAP}
SAMPLE_CONFIG=${FUNCTIONAL_SAMPLE_CONFIG}'
--config
REFERENCE=tests/data/ref.fa
GATK=tests/GenomeAnalysisTK.jar
DBSNP=tests/data/database.vcf.gz
ONETHOUSAND=tests/data/database.vcf.gz
HAPMAP=tests/data/database.vcf.gz
SAMPLE_CONFIG=tests/data/sample_config.json
exit_code: 0
stderr:
contains:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment