Commit a62d53f9 authored by Sander Bollen's avatar Sander Bollen
Browse files

remove out_dir calls

parent 1a7c51a9
Pipeline #2446 failed with stage
in 1 minute and 7 seconds
......@@ -148,8 +148,6 @@ def split_genome(ref, approx_n_chunks=100):
CHUNKS = split_genome(REFERENCE)
def out_path(path):
return join(OUT_DIR, path)
def get_r(strand, wildcards):
"""Get fastq files on a single strand for a sample"""
......@@ -183,66 +181,66 @@ def metrics(do_metrics=True):
if not do_metrics:
return ""
fqcr = expand(out_path("{sample}/pre_process/raw_fastqc/.done.txt"),
fqcr = expand("{sample}/pre_process/raw_fastqc/.done.txt",
sample=SAMPLES)
fqcm = expand(out_path("{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip"),
fqcm = expand("{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
sample=SAMPLES)
fqcp = expand(out_path("{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip"),
fqcp = expand("{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip",
sample=SAMPLES)
if len(REFFLATS) >= 1:
coverage_stats = expand(out_path("{sample}/coverage/{ref}.coverages.tsv"),
coverage_stats = expand("{sample}/coverage/{ref}.coverages.tsv",
sample=SAMPLES, ref=BASE_REFFLATS)
else:
coverage_stats = []
stats = out_path("stats.json")
stats = "stats.json")
return fqcr + fqcm + fqcp + coverage_stats + [stats]
rule all:
input:
combined=out_path("multisample/genotyped.vcf.gz"),
report=out_path("multiqc_report/multiqc_report.html"),
bais=expand(out_path("{sample}/bams/{sample}.markdup.bam.bai"),
combined="multisample/genotyped.vcf.gz",
report="multiqc_report/multiqc_report.html",
bais=expand("{sample}/bams/{sample}.markdup.bam.bai",
sample=SAMPLES),
vcfs=expand(out_path("{sample}/vcf/{sample}_single.vcf.gz"),
vcfs=expand("{sample}/vcf/{sample}_single.vcf.gz",
sample=SAMPLES),
stats=metrics()
rule create_markdup_tmp:
"""Create tmp directory for mark duplicates"""
output: ancient(out_path("tmp"))
output: ancient("tmp")
shell: "mkdir -p {output}"
rule genome:
"""Create genome file as used by bedtools"""
input: REFERENCE
output: out_path("current.genome")
output: "current.genome"
shell: "awk -v OFS='\t' {{'print $1,$2'}} {input}.fai > {output}"
rule merge_r1:
"""Merge all forward fastq files into one"""
input: get_r1
output: temp(out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"))
output: temp("{sample}/pre_process/{sample}.merged_R1.fastq.gz")
shell: "cat {input} > {output}"
rule merge_r2:
"""Merge all reverse fastq files into one"""
input: get_r2
output: temp(out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz"))
output: temp("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
shell: "cat {input} > {output}"
rule seqtk_r1:
"""Either subsample or link forward fastq file"""
input:
stats=out_path("{sample}/pre_process/{sample}.preqc_count.json"),
fastq=out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"),
stats="{sample}/pre_process/{sample}.preqc_count.json",
fastq="{sample}/pre_process/{sample}.merged_R1.fastq.gz",
seqtk=seq
params:
max_bases=str(MAX_BASES)
output:
fastq=temp(out_path("{sample}/pre_process/{sample}.sampled_R1.fastq.gz"))
fastq=temp("{sample}/pre_process/{sample}.sampled_R1.fastq.gz")
conda: "envs/seqtk.yml"
shell: "bash {input.seqtk} {input.stats} {input.fastq} {output.fastq} {params.max_bases}"
......@@ -250,13 +248,13 @@ rule seqtk_r1:
rule seqtk_r2:
"""Either subsample or link reverse fastq file"""
input:
stats = out_path("{sample}/pre_process/{sample}.preqc_count.json"),
fastq = out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz"),
stats = "{sample}/pre_process/{sample}.preqc_count.json",
fastq = "{sample}/pre_process/{sample}.merged_R2.fastq.gz",
seqtk=seq
params:
max_bases =str(MAX_BASES)
output:
fastq = temp(out_path("{sample}/pre_process/{sample}.sampled_R2.fastq.gz"))
fastq = temp("{sample}/pre_process/{sample}.sampled_R2.fastq.gz")
conda: "envs/seqtk.yml"
shell: "bash {input.seqtk} {input.stats} {input.fastq} {output.fastq} {params.max_bases}"
......@@ -265,14 +263,14 @@ rule seqtk_r2:
rule sickle:
"""Trim fastq files"""
input:
r1 = out_path("{sample}/pre_process/{sample}.sampled_R1.fastq.gz"),
r2 = out_path("{sample}/pre_process/{sample}.sampled_R2.fastq.gz"),
rr1 = out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"),
rr2 = out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
r1 = "{sample}/pre_process/{sample}.sampled_R1.fastq.gz",
r2 = "{sample}/pre_process/{sample}.sampled_R2.fastq.gz",
rr1 = "{sample}/pre_process/{sample}.merged_R1.fastq.gz",
rr2 = "{sample}/pre_process/{sample}.merged_R2.fastq.gz"
output:
r1 = temp(out_path("{sample}/pre_process/{sample}.trimmed_R1.fastq")),
r2 = temp(out_path("{sample}/pre_process/{sample}.trimmed_R2.fastq")),
s = out_path("{sample}/pre_process/{sample}.trimmed_singles.fastq"),
r1 = temp("{sample}/pre_process/{sample}.trimmed_R1.fastq"),
r2 = temp("{sample}/pre_process/{sample}.trimmed_R2.fastq"),
s = "{sample}/pre_process/{sample}.trimmed_singles.fastq",
conda: "envs/sickle.yml"
shell: "sickle pe -f {input.r1} -r {input.r2} -t sanger -o {output.r1} "
"-p {output.r2} -s {output.s}"
......@@ -280,11 +278,11 @@ rule sickle:
rule cutadapt:
"""Clip fastq files"""
input:
r1 = out_path("{sample}/pre_process/{sample}.trimmed_R1.fastq"),
r2 = out_path("{sample}/pre_process/{sample}.trimmed_R2.fastq")
r1 = "{sample}/pre_process/{sample}.trimmed_R1.fastq",
r2 = "{sample}/pre_process/{sample}.trimmed_R2.fastq"
output:
r1 = temp(out_path("{sample}/pre_process/{sample}.cutadapt_R1.fastq")),
r2 = temp(out_path("{sample}/pre_process/{sample}.cutadapt_R2.fastq"))
r1 = temp("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
r2 = temp("{sample}/pre_process/{sample}.cutadapt_R2.fastq")
conda: "envs/cutadapt.yml"
shell: "cutadapt -a AGATCGGAAGAG -A AGATCGGAAGAG -m 1 -o {output.r1} "
"{input.r1} -p {output.r2} {input.r2}"
......@@ -292,12 +290,12 @@ rule cutadapt:
rule align:
"""Align fastq files"""
input:
r1 = out_path("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
r2 = out_path("{sample}/pre_process/{sample}.cutadapt_R2.fastq"),
r1 = "{sample}/pre_process/{sample}.cutadapt_R1.fastq",
r2 = "{sample}/pre_process/{sample}.cutadapt_R2.fastq",
ref = REFERENCE
params:
rg = "@RG\\tID:{sample}_lib1\\tSM:{sample}\\tPL:ILLUMINA"
output: temp(out_path("{sample}/bams/{sample}.sorted.bam"))
output: temp("{sample}/bams/{sample}.sorted.bam")
conda: "envs/bwa.yml"
shell: "bwa mem -t 8 -R '{params.rg}' {input.ref} {input.r1} {input.r2} "
"| picard SortSam CREATE_INDEX=TRUE TMP_DIR=null "
......@@ -306,12 +304,12 @@ rule align:
rule markdup:
"""Mark duplicates in BAM file"""
input:
bam = out_path("{sample}/bams/{sample}.sorted.bam"),
tmp = ancient(out_path("tmp"))
bam = "{sample}/bams/{sample}.sorted.bam",
tmp = ancient("tmp"))
output:
bam = out_path("{sample}/bams/{sample}.markdup.bam"),
bai = out_path("{sample}/bams/{sample}.markdup.bai"),
metrics = out_path("{sample}/bams/{sample}.markdup.metrics")
bam = "{sample}/bams/{sample}.markdup.bam",
bai = "{sample}/bams/{sample}.markdup.bai",
metrics = "{sample}/bams/{sample}.markdup.metrics"
conda: "envs/picard.yml"
shell: "picard MarkDuplicates CREATE_INDEX=TRUE TMP_DIR={input.tmp} "
"INPUT={input.bam} OUTPUT={output.bam} "
......@@ -321,15 +319,15 @@ rule markdup:
rule bai:
"""Copy bai files as some genome browsers can only .bam.bai files"""
input:
bai = out_path("{sample}/bams/{sample}.markdup.bai")
bai = "{sample}/bams/{sample}.markdup.bai"
output:
bai = out_path("{sample}/bams/{sample}.markdup.bam.bai")
bai = "{sample}/bams/{sample}.markdup.bam.bai"
shell: "cp {input.bai} {output.bai}"
rule baserecal:
"""Base recalibrated BAM files"""
input:
bam = out_path("{sample}/bams/{sample}.markdup.bam"),
bam = "{sample}/bams/{sample}.markdup.bam",
java = JAVA,
gatk = GATK,
ref = REFERENCE,
......@@ -337,7 +335,7 @@ rule baserecal:
one1kg = ONETHOUSAND,
hapmap = HAPMAP
output:
grp = out_path("{sample}/bams/{sample}.baserecal.grp")
grp = "{sample}/bams/{sample}.baserecal.grp"
conda: "envs/gatk.yml"
shell: "{input.java} -XX:ParallelGCThreads=1 -jar {input.gatk} -T BaseRecalibrator "
"-I {input.bam} -o {output.grp} -nct 8 -R {input.ref} "
......@@ -349,16 +347,16 @@ rule baserecal:
rule gvcf_scatter:
"""Run HaplotypeCaller in GVCF mode by chunk"""
input:
bam=out_path("{sample}/bams/{sample}.markdup.bam"),
bqsr=out_path("{sample}/bams/{sample}.baserecal.grp"),
bam="{sample}/bams/{sample}.markdup.bam",
bqsr="{sample}/bams/{sample}.baserecal.grp",
dbsnp=DBSNP,
ref=REFERENCE,
gatk=GATK
params:
chunk="{chunk}"
output:
gvcf=temp(out_path("{sample}/vcf/{sample}.{chunk}.part.vcf.gz")),
gvcf_tbi=temp(out_path("{sample}/vcf/{sample}.{chunk}.part.vcf.gz.tbi"))
gvcf=temp("{sample}/vcf/{sample}.{chunk}.part.vcf.gz"),
gvcf_tbi=temp("{sample}/vcf/{sample}.{chunk}.part.vcf.gz.tbi")
conda: "envs/gatk.yml"
shell: "java -jar -Xmx4G -XX:ParallelGCThreads=1 {input.gatk} -T HaplotypeCaller -ERC GVCF -I "
"{input.bam} -R {input.ref} -D {input.dbsnp} "
......@@ -370,17 +368,17 @@ rule gvcf_scatter:
rule gvcf_gather:
"""Gather all gvcf scatters"""
input:
gvcfs=expand(out_path("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz"),
gvcfs=expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz",
chunk=CHUNKS),
tbis=expand(out_path("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz.tbi"),
tbis=expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz.tbi",
chunk=CHUNKS),
ref=REFERENCE,
gatk=GATK
params:
gvcfs="' -V '".join(expand(out_path("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz"),
gvcfs="' -V '".join(expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz",
chunk=CHUNKS))
output:
gvcf=out_path("{sample}/vcf/{sample}.g.vcf.gz")
gvcf="{sample}/vcf/{sample}.g.vcf.gz"
conda: "envs/gatk.yml"
shell: "java -Xmx4G -XX:ParallelGCThreads=1 -cp {input.gatk} org.broadinstitute.gatk.tools.CatVariants "
"-R {input.ref} -V '{params.gvcfs}' -out {output.gvcf} "
......@@ -390,17 +388,17 @@ rule gvcf_gather:
rule genotype_scatter:
"""Run GATK's GenotypeGVCFs by chunk"""
input:
gvcfs = expand(out_path("{sample}/vcf/{sample}.g.vcf.gz"),
gvcfs = expand("{sample}/vcf/{sample}.g.vcf.gz",
sample=SAMPLES),
ref=REFERENCE,
gatk=GATK
params:
li=" -V ".join(expand(out_path("{sample}/vcf/{sample}.g.vcf.gz"),
li=" -V ".join(expand("{sample}/vcf/{sample}.g.vcf.gz",
sample=SAMPLES)),
chunk="{chunk}"
output:
vcf=temp(out_path("multisample/genotype.{chunk}.part.vcf.gz")),
vcf_tbi=temp(out_path("multisample/genotype.{chunk}.part.vcf.gz.tbi"))
vcf=temp("multisample/genotype.{chunk}.part.vcf.gz"),
vcf_tbi=temp("multisample/genotype.{chunk}.part.vcf.gz.tbi")
conda: "envs/gatk.yml"
shell: "java -jar -Xmx15G -XX:ParallelGCThreads=1 {input.gatk} -T GenotypeGVCFs -R {input.ref} "
"-V {params.li} -L '{params.chunk}' -o '{output.vcf}'"
......@@ -409,17 +407,17 @@ rule genotype_scatter:
rule genotype_gather:
"""Gather all genotyping scatters"""
input:
vcfs=expand(out_path("multisample/genotype.{chunk}.part.vcf.gz"),
vcfs=expand("multisample/genotype.{chunk}.part.vcf.gz",
chunk=CHUNKS),
tbis=expand(out_path("multisample/genotype.{chunk}.part.vcf.gz.tbi"),
tbis=expand("multisample/genotype.{chunk}.part.vcf.gz.tbi",
chunk=CHUNKS),
ref=REFERENCE,
gatk=GATK
params:
vcfs="' -V '".join(expand(out_path("multisample/genotype.{chunk}.part.vcf.gz"),
vcfs="' -V '".join(expand("multisample/genotype.{chunk}.part.vcf.gz",
chunk=CHUNKS))
output:
combined=out_path("multisample/genotyped.vcf.gz")
combined="multisample/genotyped.vcf.gz"
conda: "envs/gatk.yml"
shell: "java -Xmx4G -XX:ParallelGCThreads=1 -cp {input.gatk} org.broadinstitute.gatk.tools.CatVariants "
"-R {input.ref} -V '{params.vcfs}' -out {output.combined} "
......@@ -429,13 +427,13 @@ rule genotype_gather:
rule split_vcf:
"""Split multisample VCF in single samples"""
input:
vcf=out_path("multisample/genotyped.vcf.gz"),
vcf="multisample/genotyped.vcf.gz",
gatk=GATK,
ref=REFERENCE
params:
s="{sample}"
output:
splitted=out_path("{sample}/vcf/{sample}_single.vcf.gz")
splitted="{sample}/vcf/{sample}_single.vcf.gz"
conda: "envs/gatk.yml"
shell: "java -Xmx15G -XX:ParallelGCThreads=1 -jar {input.gatk} -T SelectVariants -sn "
"{params.s} -env -R {input.ref} -V {input.vcf} -o {output.splitted}"
......@@ -446,9 +444,9 @@ rule split_vcf:
rule mapped_num:
"""Calculate number of mapped reads"""
input:
bam=out_path("{sample}/bams/{sample}.sorted.bam")
bam="{sample}/bams/{sample}.sorted.bam"
output:
num=out_path("{sample}/bams/{sample}.mapped.num")
num="{sample}/bams/{sample}.mapped.num"
conda: "envs/samtools.yml"
shell: "samtools view -F 4 {input.bam} | wc -l > {output.num}"
......@@ -456,9 +454,9 @@ rule mapped_num:
rule mapped_basenum:
"""Calculate number of mapped bases"""
input:
bam=out_path("{sample}/bams/{sample}.sorted.bam")
bam="{sample}/bams/{sample}.sorted.bam"
output:
num=out_path("{sample}/bams/{sample}.mapped.basenum")
num="{sample}/bams/{sample}.mapped.basenum"
conda: "envs/samtools.yml"
shell: "samtools view -F 4 {input.bam} | cut -f10 | wc -c > {output.num}"
......@@ -466,9 +464,9 @@ rule mapped_basenum:
rule unique_num:
"""Calculate number of unique reads"""
input:
bam=out_path("{sample}/bams/{sample}.markdup.bam")
bam="{sample}/bams/{sample}.markdup.bam"
output:
num=out_path("{sample}/bams/{sample}.unique.num")
num="{sample}/bams/{sample}.unique.num"
conda: "envs/samtools.yml"
shell: "samtools view -F 4 -F 1024 {input.bam} | wc -l > {output.num}"
......@@ -476,9 +474,9 @@ rule unique_num:
rule usable_basenum:
"""Calculate number of bases on unique reads"""
input:
bam=out_path("{sample}/bams/{sample}.markdup.bam")
bam="{sample}/bams/{sample}.markdup.bam"
output:
num=out_path("{sample}/bams/{sample}.usable.basenum")
num="{sample}/bams/{sample}.usable.basenum"
conda: "envs/samtools.yml"
shell: "samtools view -F 4 -F 1024 {input.bam} | cut -f10 | wc -c > {output.num}"
......@@ -491,9 +489,9 @@ rule fastqc_raw:
r1=get_r1,
r2=get_r2
params:
odir=out_path("{sample}/pre_process/raw_fastqc")
odir="{sample}/pre_process/raw_fastqc"
output:
aux=out_path("{sample}/pre_process/raw_fastqc/.done.txt")
aux="{sample}/pre_process/raw_fastqc/.done.txt"
conda: "envs/fastqc.yml"
shell: "fastqc --nogroup -o {params.odir} {input.r1} {input.r2} && echo 'done' > {output.aux}"
......@@ -501,14 +499,14 @@ rule fastqc_raw:
rule fastqc_merged:
"""Run fastqc on merged fastq files"""
input:
r1=out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"),
r2=out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz"),
r1="{sample}/pre_process/{sample}.merged_R1.fastq.gz",
r2="{sample}/pre_process/{sample}.merged_R2.fastq.gz",
fq=fqsc
params:
odir=out_path("{sample}/pre_process/merged_fastqc")
odir="{sample}/pre_process/merged_fastqc"
output:
r1=out_path("{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip"),
r2=out_path("{sample}/pre_process/merged_fastqc/{sample}.merged_R2_fastqc.zip")
r1="{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
r2="{sample}/pre_process/merged_fastqc/{sample}.merged_R2_fastqc.zip"
conda: "envs/fastqc.yml"
shell: "bash {input.fq} {input.r1} {input.r2} "
"{output.r1} {output.r2} {params.odir}"
......@@ -517,14 +515,14 @@ rule fastqc_merged:
rule fastqc_postqc:
"""Run fastqc on fastq files post pre-processing"""
input:
r1=out_path("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
r2=out_path("{sample}/pre_process/{sample}.cutadapt_R2.fastq"),
r1="{sample}/pre_process/{sample}.cutadapt_R1.fastq",
r2="{sample}/pre_process/{sample}.cutadapt_R2.fastq",
fq=fqsc
params:
odir=out_path("{sample}/pre_process/postqc_fastqc")
odir="{sample}/pre_process/postqc_fastqc"
output:
r1=out_path("{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip"),
r2=out_path("{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R2_fastqc.zip")
r1="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip",
r2="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R2_fastqc.zip"
conda: "envs/fastqc.yml"
shell: "bash {input.fq} {input.r1} {input.r2} "
"{output.r1} {output.r2} {params.odir}"
......@@ -535,24 +533,24 @@ rule fastqc_postqc:
rule fqcount_preqc:
"""Calculate number of reads and bases before pre-processing"""
input:
r1=out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"),
r2=out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
r1="{sample}/pre_process/{sample}.merged_R1.fastq.gz",
r2="{sample}/pre_process/{sample}.merged_R2.fastq.gz"
params:
fastqcount=fqc
output:
out_path("{sample}/pre_process/{sample}.preqc_count.json")
"{sample}/pre_process/{sample}.preqc_count.json"
shell: "{params.fastqcount} {input.r1} {input.r2} > {output}"
rule fqcount_postqc:
"""Calculate number of reads and bases after pre-processing"""
input:
r1=out_path("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
r2=out_path("{sample}/pre_process/{sample}.cutadapt_R2.fastq")
r1="{sample}/pre_process/{sample}.cutadapt_R1.fastq",
r2="{sample}/pre_process/{sample}.cutadapt_R2.fastq"
params:
fastqcount=fqc
output:
out_path("{sample}/pre_process/{sample}.postqc_count.json")
"{sample}/pre_process/{sample}.postqc_count.json"
shell: "{params.fastqcount} {input.r1} {input.r2} > {output}"
......@@ -560,14 +558,14 @@ rule fqcount_postqc:
rule fastqc_stats:
"""Collect fastq stats for a sample in json format"""
input:
preqc_r1=out_path("{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip"),
preqc_r2=out_path("{sample}/pre_process/merged_fastqc/{sample}.merged_R2_fastqc.zip"),
postqc_r1=out_path("{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip"),
postqc_r2=out_path("{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R2_fastqc.zip"),
preqc_r1="{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
preqc_r2="{sample}/pre_process/merged_fastqc/{sample}.merged_R2_fastqc.zip",
postqc_r1="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip",
postqc_r2="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R2_fastqc.zip",
sc=fqpy
conda: "envs/collectstats.yml"
output:
out_path("{sample}/pre_process/fastq_stats.json")
"{sample}/pre_process/fastq_stats.json"
shell: "python {input.sc} --preqc-r1 {input.preqc_r1} "
"--preqc-r2 {input.preqc_r2} "
"--postqc-r1 {input.postqc_r1} "
......@@ -578,15 +576,15 @@ rule fastqc_stats:
rule covstats:
"""Calculate coverage statistics on bam file"""
input:
bam=out_path("{sample}/bams/{sample}.markdup.bam"),
genome=out_path("current.genome"),
bam="{sample}/bams/{sample}.markdup.bam",
genome="current.genome",
covpy=covpy,
bed=get_bedpath
params:
subt="Sample {sample}"
output:
covj=out_path("{sample}/coverage/{bed}.covstats.json"),
covp=out_path("{sample}/coverage/{bed}.covstats.png")
covj="{sample}/coverage/{bed}.covstats.json",
covp="{sample}/coverage/{bed}.covstats.png"
conda: "envs/covstat.yml"
shell: "bedtools coverage -sorted -g {input.genome} -a {input.bed} -b {input.bam} "
"-d | python {input.covpy} - --plot {output.covp} "
......@@ -596,10 +594,10 @@ rule covstats:
rule vtools_coverage:
"""Calculate coverage statistics per transcript"""
input:
gvcf=out_path("{sample}/vcf/{sample}.g.vcf.gz"),
gvcf="{sample}/vcf/{sample}.g.vcf.gz",
ref=get_refflatpath
output:
tsv=out_path("{sample}/coverage/{ref}.coverages.tsv")
tsv="{sample}/coverage/{ref}.coverages.tsv"
conda: "envs/vcfstats.yml"
shell: "vtools-gcoverage -I {input.gvcf} -R {input.ref} > {output.tsv}"
......@@ -609,9 +607,9 @@ rule vtools_coverage:
rule vcfstats:
"""Calculate vcf statistics"""
input:
vcf=out_path("multisample/genotyped.vcf.gz")
vcf="multisample/genotyped.vcf.gz"
output:
stats=out_path("multisample/vcfstats.json")
stats="multisample/vcfstats.json"
conda: "envs/vcfstats.yml"
shell: "vtools-stats -i {input.vcf} > {output.stats}"
......@@ -621,21 +619,21 @@ if len(BASE_BEDS) >= 1:
rule collectstats:
"""Collect all stats for a particular sample with beds"""
input:
preqc=out_path("{sample}/pre_process/{sample}.preqc_count.json"),
postq=out_path("{sample}/pre_process/{sample}.postqc_count.json"),
mnum=out_path("{sample}/bams/{sample}.mapped.num"),
mbnum=out_path("{sample}/bams/{sample}.mapped.basenum"),
unum=out_path("{sample}/bams/{sample}.unique.num"),
ubnum=out_path("{sample}/bams/{sample}.usable.basenum"),
fastqc=out_path("{sample}/pre_process/fastq_stats.json"),
cov=expand(out_path("{{sample}}/coverage/{bed}.covstats.json"),
preqc="{sample}/pre_process/{sample}.preqc_count.json",
postq="{sample}/pre_process/{sample}.postqc_count.json",
mnum="{sample}/bams/{sample}.mapped.num",
mbnum="{sample}/bams/{sample}.mapped.basenum",
unum="{sample}/bams/{sample}.unique.num",
ubnum="{sample}/bams/{sample}.usable.basenum",
fastqc="{sample}/pre_process/fastq_stats.json",
cov=expand("{{sample}}/coverage/{bed}.covstats.json",
bed=BASE_BEDS),
colpy=colpy
params:
sample_name="{sample}",
fthresh=FEMALE_THRESHOLD
output:
out_path("{sample}/{sample}.stats.json")
"{sample}/{sample}.stats.json"
conda: "envs/collectstats.yml"
shell: "python {input.colpy} --sample-name {params.sample_name} "
"--pre-qc-fastq {input.preqc} --post-qc-fastq {input.postq} "
......@@ -647,19 +645,19 @@ else:
rule collectstats:
"""Collect all stats for a particular sample without beds"""
input:
preqc = out_path("{sample}/pre_process/{sample}.preqc_count.json"),
postq = out_path("{sample}/pre_process/{sample}.postqc_count.json"),
mnum = out_path("{sample}/bams/{sample}.mapped.num"),
mbnum = out_path("{sample}/bams/{sample}.mapped.basenum"),
unum = out_path("{sample}/bams/{sample}.unique.num"),
ubnum = out_path("{sample}/bams/{sample}.usable.basenum"),
fastqc=out_path("{sample}/pre_process/fastq_stats.json"),
preqc = "{sample}/pre_process/{sample}.preqc_count.json",
postq = "{sample}/pre_process/{sample}.postqc_count.json",
mnum = "{sample}/bams/{sample}.mapped.num",
mbnum = "{sample}/bams/{sample}.mapped.basenum",
unum = "{sample}/bams/{sample}.unique.num",
ubnum = "{sample}/bams/{sample}.usable.basenum",
fastqc="{sample}/pre_process/fastq_stats.json",
colpy = colpy
params:
sample_name = "{sample}",
fthresh = FEMALE_THRESHOLD
output:
out_path("{sample}/{sample}.stats.json")
"{sample}/{sample}.stats.json"
conda: "envs/collectstats.yml"
shell: "python {input.colpy} --sample-name {params.sample_name} "
"--pre-qc-fastq {input.preqc} --post-qc-fastq {input.postq} "
......@@ -671,11 +669,11 @@ else:
rule merge_stats:
"""Merge all stats of all samples"""
input:
cols=expand(out_path("{sample}/{sample}.stats.json"), sample=SAMPLES),
vstat=out_path("multisample/vcfstats.json"),
cols=expand("{sample}/{sample}.stats.json", sample=SAMPLES),
vstat="multisample/vcfstats.json",
mpy=mpy
output:
stats=out_path("stats.json")
stats="stats.json"
conda: "envs/collectstats.yml"
shell: "python {input.mpy} --vcfstats {input.vstat} {input.cols} > {output.stats}"
......@@ -683,10 +681,10 @@ rule merge_stats:
rule stats_tsv:
"""Convert stats.json to tsv"""
input:
stats=out_path("stats.json"),
stats="stats.json",
sc=tsvpy
output:
stats=out_path("stats.tsv")
stats="stats.tsv"
conda: "envs/collectstats.yml"
shell: "python {input.sc} -i {input.stats} > {output.stats}"
......@@ -697,11 +695,11 @@ rule multiqc:
Depends on stats.tsv to forcefully run at end of pipeline
"""
input:
stats=out_path("stats.tsv")
stats="stats.tsv"
params:
odir=out_path("."),
rdir=out_path("multiqc_report")
odir="."),
rdir="multiqc_report"
output:
report=out_path("multiqc_report/multiqc_report.html")
report="multiqc_report/multiqc_report.html"
conda: "envs/multiqc.yml"
shell: "multiqc -f -o {params.rdir} {params.odir} || touch {output.report}"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment