diff --git a/Snakefile b/Snakefile
index ebd1e63d58ce936d318ba6d3bb38051ce0c9e9bf..b7ce2cb2dfc0387beaee577527820d8c4cca4116 100644
--- a/Snakefile
+++ b/Snakefile
@@ -131,22 +131,26 @@ rule all:
         stats=metrics()
 
 rule genome:
+    """Create genome file as used by bedtools"""
     input: REFERENCE
     output: out_path("current.genome")
     shell: "awk -v OFS='\t' {{'print $1,$2'}} {input}.fai > {output}"
 
 rule merge_r1:
+    """Merge all forward fastq files into one"""
     input: get_r1
     output: temp(out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"))
     shell: "cat {input} > {output}"
 
 rule merge_r2:
+    """Merge all reverse fastq files into one"""
     input: get_r2
     output: temp(out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz"))
     shell: "cat {input} > {output}"
 
 
 rule seqtk_r1:
+    """Either subsample or link forward fastq file"""
     input:
         stats=out_path("{sample}/pre_process/{sample}.preqc_count.json"),
         fastq=out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz")
@@ -159,6 +163,7 @@ rule seqtk_r1:
 
 
 rule seqtk_r2:
+    """Either subsample or link reverse fastq file"""
     input:
         stats = out_path("{sample}/pre_process/{sample}.preqc_count.json"),
         fastq = out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
@@ -172,6 +177,7 @@ rule seqtk_r2:
 
 # contains original merged fastq files as input to prevent them from being prematurely deleted
 rule sickle:
+    """Trim fastq files"""
     input:
         r1 = out_path("{sample}/pre_process/{sample}.sampled_R1.fastq.gz"),
         r2 = out_path("{sample}/pre_process/{sample}.sampled_R2.fastq.gz"),
@@ -186,6 +192,7 @@ rule sickle:
            "-p {output.r2} -s {output.s}"
 
 rule cutadapt:
+    """Clip fastq files"""
     input:
         r1 = out_path("{sample}/pre_process/{sample}.trimmed_R1.fastq"),
         r2 = out_path("{sample}/pre_process/{sample}.trimmed_R2.fastq")
@@ -197,6 +204,7 @@ rule cutadapt:
            "{input.r1} -p {output.r2} {input.r2}"
 
 rule align:
+    """Align fastq files"""
     input:
         r1 = out_path("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
         r2 = out_path("{sample}/pre_process/{sample}.cutadapt_R2.fastq"),
@@ -210,6 +218,7 @@ rule align:
            "INPUT=/dev/stdin OUTPUT={output} SORT_ORDER=coordinate"
 
 rule markdup:
+    """Mark duplicates in BAM file"""
     input:
         bam = out_path("{sample}/bams/{sample}.sorted.bam"),
     params:
@@ -224,6 +233,7 @@ rule markdup:
            "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=500"
 
 rule baserecal:
+    """Base recalibrated BAM files"""
     input:
         bam = out_path("{sample}/bams/{sample}.markdup.bam"),
         java = JAVA,
@@ -243,6 +253,7 @@ rule baserecal:
            "-knownSites {input.hapmap}"
 
 rule gvcf_scatter:
+    """Run HaplotypeCaller in GVCF mode by chunk"""
     input:
         bam=out_path("{sample}/bams/{sample}.markdup.bam"),
         bqsr=out_path("{sample}/bams/{sample}.baserecal.grp"),
@@ -262,6 +273,7 @@ rule gvcf_scatter:
 
 
 rule gvcf_gather:
+    """Gather all gvcf scatters"""
     input:
         gvcfs=expand(out_path("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz"),
                      chunk=CHUNKS),
@@ -279,6 +291,7 @@ rule gvcf_gather:
 
 
 rule genotype_scatter:
+    """Run GATK's GenotypeGVCFs by chunk"""
     input:
         gvcfs = expand(out_path("{sample}/vcf/{sample}.g.vcf.gz"),
                        sample=SAMPLES),
@@ -296,6 +309,7 @@ rule genotype_scatter:
 
 
 rule genotype_gather:
+    """Gather all genotyping scatters"""
     input:
         vcfs=expand(out_path("multisample/genotype.{chunk}.part.vcf.gz"),
                     chunk=CHUNKS),
@@ -315,6 +329,7 @@ rule genotype_gather:
 ## bam metrics
 
 rule mapped_num:
+    """Calculate number of mapped reads"""
     input:
         bam=out_path("{sample}/bams/{sample}.sorted.bam")
     output:
@@ -324,6 +339,7 @@ rule mapped_num:
 
 
 rule mapped_basenum:
+    """Calculate number of mapped bases"""
     input:
         bam=out_path("{sample}/bams/{sample}.sorted.bam")
     output:
@@ -333,6 +349,7 @@ rule mapped_basenum:
 
 
 rule unique_num:
+    """Calculate number of unique reads"""
     input:
         bam=out_path("{sample}/bams/{sample}.markdup.bam")
     output:
@@ -342,6 +359,7 @@ rule unique_num:
 
 
 rule usable_basenum:
+    """Calculate number of bases on unique reads"""
     input:
         bam=out_path("{sample}/bams/{sample}.markdup.bam")
     output:
@@ -353,6 +371,7 @@ rule usable_basenum:
 ## fastqc
 
 rule fastqc_raw:
+    """Run fastqc on raw fastq files"""
     input:
         r1=get_r1,
         r2=get_r2
@@ -365,6 +384,7 @@ rule fastqc_raw:
 
 
 rule fastqc_merged:
+    """Run fastqc on merged fastq files"""
     input:
         r1=out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"),
         r2=out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
@@ -377,6 +397,7 @@ rule fastqc_merged:
 
 
 rule fastqc_postqc:
+    """Run fastqc on fastq files post pre-processing"""
     input:
         r1=out_path("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
         r2=out_path("{sample}/pre_process/{sample}.cutadapt_R2.fastq")
@@ -391,6 +412,7 @@ rule fastqc_postqc:
 ## fastq-count
 
 rule fqcount_preqc:
+    """Calculate number of reads and bases before pre-processing"""
     input:
         r1=out_path("{sample}/pre_process/{sample}.merged_R1.fastq.gz"),
         r2=out_path("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
@@ -402,6 +424,7 @@ rule fqcount_preqc:
 
 
 rule fqcount_postqc:
+    """Calculate number of reads and bases after pre-processing"""
     input:
         r1=out_path("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
         r2=out_path("{sample}/pre_process/{sample}.cutadapt_R2.fastq")
@@ -415,6 +438,7 @@ rule fqcount_postqc:
 ## coverages
 
 rule covstats:
+    """Calculate coverage statistics on bam file"""
     input:
         bam=out_path("{sample}/bams/{sample}.markdup.bam"),
         genome=out_path("current.genome"),
@@ -434,6 +458,7 @@ rule covstats:
 ## vcfstats
 
 rule vcfstats:
+    """Calculate vcf statistics"""
     input:
         vcf=out_path("multisample/genotyped.vcf.gz"),
         vs_py=vs_py
@@ -445,6 +470,7 @@ rule vcfstats:
 
 ## collection
 rule collectstats:
+    """Collect all stats for a particular sample"""
     input:
         preqc=out_path("{sample}/pre_process/{sample}.preqc_count.json"),
         postq=out_path("{sample}/pre_process/{sample}.postqc_count.json"),
@@ -467,6 +493,7 @@ rule collectstats:
            "--female-threshold {params.fthresh} {input.cov} > {output}"
 
 rule merge_stats:
+    """Merge all stats of all samples"""
     input:
         cols=expand(out_path("{sample}/{sample}.stats.json"), sample=SAMPLES),
         vstat=out_path("multisample/vcfstats.json"),