Skip to content
Snippets Groups Projects
Commit 5e129da5 authored by Sander Bollen's avatar Sander Bollen
Browse files

all rules with docstrings

parent bf0c7752
No related branches found
No related tags found
1 merge request!2Review comments
......@@ -131,22 +131,26 @@ rule all:
stats=metrics()
rule genome:
"""Create genome file as used by bedtools"""
input: REFERENCE
output: out_path("current.genome")
shell: "awk -v OFS='\t' {{'print $1,$2'}} {input}.fai > {output}"
rule merge_r1:
"""Merge all forward fastq files into one"""
input: get_r1
output: temp(out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"))
shell: "cat {input} > {output}"
rule merge_r2:
"""Merge all reverse fastq files into one"""
input: get_r2
output: temp(out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz"))
shell: "cat {input} > {output}"
rule seqtk_r1:
"""Either subsample or link forward fastq file"""
input:
stats=out_path("{sample}/pre_process/{sample}.preqc_count.json"),
fastq=out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz")
......@@ -159,6 +163,7 @@ rule seqtk_r1:
rule seqtk_r2:
"""Either subsample or link reverse fastq file"""
input:
stats = out_path("{sample}/pre_process/{sample}.preqc_count.json"),
fastq = out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
......@@ -172,6 +177,7 @@ rule seqtk_r2:
# contains original merged fastq files as input to prevent them from being prematurely deleted
rule sickle:
"""Trim fastq files"""
input:
r1 = out_path("{sample}/pre_process/{sample}.sampled_R1.fastq.gz"),
r2 = out_path("{sample}/pre_process/{sample}.sampled_R2.fastq.gz"),
......@@ -186,6 +192,7 @@ rule sickle:
"-p {output.r2} -s {output.s}"
rule cutadapt:
"""Clip fastq files"""
input:
r1 = out_path("{sample}/pre_process/{sample}.trimmed_R1.fastq"),
r2 = out_path("{sample}/pre_process/{sample}.trimmed_R2.fastq")
......@@ -197,6 +204,7 @@ rule cutadapt:
"{input.r1} -p {output.r2} {input.r2}"
rule align:
"""Align fastq files"""
input:
r1 = out_path("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
r2 = out_path("{sample}/pre_process/{sample}.cutadapt_R2.fastq"),
......@@ -210,6 +218,7 @@ rule align:
"INPUT=/dev/stdin OUTPUT={output} SORT_ORDER=coordinate"
rule markdup:
"""Mark duplicates in BAM file"""
input:
bam = out_path("{sample}/bams/{sample}.sorted.bam"),
params:
......@@ -224,6 +233,7 @@ rule markdup:
"MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=500"
rule baserecal:
"""Base recalibrated BAM files"""
input:
bam = out_path("{sample}/bams/{sample}.markdup.bam"),
java = JAVA,
......@@ -243,6 +253,7 @@ rule baserecal:
"-knownSites {input.hapmap}"
rule gvcf_scatter:
"""Run HaplotypeCaller in GVCF mode by chunk"""
input:
bam=out_path("{sample}/bams/{sample}.markdup.bam"),
bqsr=out_path("{sample}/bams/{sample}.baserecal.grp"),
......@@ -262,6 +273,7 @@ rule gvcf_scatter:
rule gvcf_gather:
"""Gather all gvcf scatters"""
input:
gvcfs=expand(out_path("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz"),
chunk=CHUNKS),
......@@ -279,6 +291,7 @@ rule gvcf_gather:
rule genotype_scatter:
"""Run GATK's GenotypeGVCFs by chunk"""
input:
gvcfs = expand(out_path("{sample}/vcf/{sample}.g.vcf.gz"),
sample=SAMPLES),
......@@ -296,6 +309,7 @@ rule genotype_scatter:
rule genotype_gather:
"""Gather all genotyping scatters"""
input:
vcfs=expand(out_path("multisample/genotype.{chunk}.part.vcf.gz"),
chunk=CHUNKS),
......@@ -315,6 +329,7 @@ rule genotype_gather:
## bam metrics
rule mapped_num:
"""Calculate number of mapped reads"""
input:
bam=out_path("{sample}/bams/{sample}.sorted.bam")
output:
......@@ -324,6 +339,7 @@ rule mapped_num:
rule mapped_basenum:
"""Calculate number of mapped bases"""
input:
bam=out_path("{sample}/bams/{sample}.sorted.bam")
output:
......@@ -333,6 +349,7 @@ rule mapped_basenum:
rule unique_num:
"""Calculate number of unique reads"""
input:
bam=out_path("{sample}/bams/{sample}.markdup.bam")
output:
......@@ -342,6 +359,7 @@ rule unique_num:
rule usable_basenum:
"""Calculate number of bases on unique reads"""
input:
bam=out_path("{sample}/bams/{sample}.markdup.bam")
output:
......@@ -353,6 +371,7 @@ rule usable_basenum:
## fastqc
rule fastqc_raw:
"""Run fastqc on raw fastq files"""
input:
r1=get_r1,
r2=get_r2
......@@ -365,6 +384,7 @@ rule fastqc_raw:
rule fastqc_merged:
"""Run fastqc on merged fastq files"""
input:
r1=out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"),
r2=out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
......@@ -377,6 +397,7 @@ rule fastqc_merged:
rule fastqc_postqc:
"""Run fastqc on fastq files post pre-processing"""
input:
r1=out_path("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
r2=out_path("{sample}/pre_process/{sample}.cutadapt_R2.fastq")
......@@ -391,6 +412,7 @@ rule fastqc_postqc:
## fastq-count
rule fqcount_preqc:
"""Calculate number of reads and bases before pre-processing"""
input:
r1=out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"),
r2=out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
......@@ -402,6 +424,7 @@ rule fqcount_preqc:
rule fqcount_postqc:
"""Calculate number of reads and bases after pre-processing"""
input:
r1=out_path("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
r2=out_path("{sample}/pre_process/{sample}.cutadapt_R2.fastq")
......@@ -415,6 +438,7 @@ rule fqcount_postqc:
## coverages
rule covstats:
"""Calculate coverage statistics on bam file"""
input:
bam=out_path("{sample}/bams/{sample}.markdup.bam"),
genome=out_path("current.genome"),
......@@ -434,6 +458,7 @@ rule covstats:
## vcfstats
rule vcfstats:
"""Calculate vcf statistics"""
input:
vcf=out_path("multisample/genotyped.vcf.gz"),
vs_py=vs_py
......@@ -445,6 +470,7 @@ rule vcfstats:
## collection
rule collectstats:
"""Collect all stats for a particular sample"""
input:
preqc=out_path("{sample}/pre_process/{sample}.preqc_count.json"),
postq=out_path("{sample}/pre_process/{sample}.postqc_count.json"),
......@@ -467,6 +493,7 @@ rule collectstats:
"--female-threshold {params.fthresh} {input.cov} > {output}"
rule merge_stats:
"""Merge all stats of all samples"""
input:
cols=expand(out_path("{sample}/{sample}.stats.json"), sample=SAMPLES),
vstat=out_path("multisample/vcfstats.json"),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment