Snakefile 3.89 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
################## Import libraries ##################

import pandas as pd
import os
import sys
from subprocess import call
import itertools
from snakemake.utils import R


################## Configuration file and PATHS##################

configfile: "config.yaml"

WORKING_DIR         = config["working_dir"]
RESULT_DIR          = config["results_dir"]
DATA_DIR            = config["data_dir"]
GENOME_FASTA_URL    = config["genome_fasta_url"]
19
GENE_GTF_URL        = config["gene_gtf_url"]
20

WJH58's avatar
WJH58 committed
21
#units = pd.read_table(config["units"], dtype=str).set_index(["bed"], drop=False)
22

WJH58's avatar
WJH58 committed
23
#BED = units.index.get_level_values('bed').unique().tolist()
WJH58's avatar
WJH58 committed
24
units = pd.read_table(config["units"], dtype=str).set_index(["sample"], drop=False)
25
26
27
28
29
30
31

SAMPLES = units.index.get_level_values('sample').unique().tolist()

###############
# Helper Functions
###############
def get_fastq(wildcards):
WJH58's avatar
WJH58 committed
32
    return units.loc[(wildcards.samples), ["fq1", "fq2"]].dropna()
33
34


WJH58's avatar
WJH58 committed
35
36
37
38
39
40
41
##############
# Wildcards
##############
wildcard_constraints:
    sample = "[A-Za-z0-9]+"

wildcard_constraints:
JihedC's avatar
JihedC committed
42
    unit = "[A-Za-z0-9]+
43
44


WJH58's avatar
WJH58 committed
45
################## DESIRED OUTPUT ##################
46
47
48
# Here we define the outputs of rules we want the pipeline to produce.
# The varialble defined here is then used in the `rule all`.
# Fastqc rule need to be modified to accept any sample
49
# HMMRATAC will not work with the test dataset.
50
51
52
53
54
55
56
57
58
59
60
61

FASTQC              =       expand(RESULT_DIR     + "fastqc/sample_{numbers}_{R}_fastqc.html",numbers = ['8','12','4_3','4_1'], R=['R1', 'R2']),
FORWARD_READS       =       expand(WORKING_DIR    + "trimmed/{samples}_forward.fastq.gz", samples = SAMPLES),
REVERSE_READS       =       expand(WORKING_DIR    + "trimmed/{samples}_reverse.fastq.gz", samples = SAMPLES),
TRIMMED_FASTQC      =       expand(RESULT_DIR     + "trimmed_fastqc/{samples}_{direction}_fastqc.html", samples = SAMPLES, direction=['forward', 'reverse']),
MAPPED              =       expand(WORKING_DIR    + "mapped/{samples}.bam", samples = SAMPLES),
UNMAPPED            =       expand([WORKING_DIR   + "unmapped/{samples}.fq." + str(i) +".gz" for i in range(1,2)], samples = SAMPLES),
MAP_SORTED          =       expand(WORKING_DIR    + "sort/{samples}.sorted.bam", samples = SAMPLES),
DEDUP               =       expand(WORKING_DIR    + "dedup/{samples}.dedup.bam", samples = SAMPLES),
STATS               =       expand(WORKING_DIR    + "dedup/{samples}.dedup.stats", samples = SAMPLES),
SORTED_INDEXED      =       expand(WORKING_DIR    + "sort/{samples}.sorted.bam.bai", samples = SAMPLES),
GENOME_INFO         =       expand(WORKING_DIR    + "genome_info/{samples}.genome.info", samples  = SAMPLES),
62
HMMRATAC            =       expand(RESULT_DIR     + "hmmratac/{samples}_peaks.gappedPeak", samples = SAMPLES),
63
64
65
66
67
NAME_LOG            =       expand(RESULT_DIR     + "peaks/{samples}.log", samples = SAMPLES),
NARROWPEAK          =       expand(RESULT_DIR     + "macs2/{samples}_peaks.narrowPeak", samples = SAMPLES),
COVERAGE_TRACK      =       expand(RESULT_DIR     + "bamCoverage/{samples}.bw", samples = SAMPLES),
BIGWIGSUMMARY       =        RESULT_DIR     + "bigwigsummary/multiBigwigSummary.npz",
PCAPLOT             =        RESULT_DIR     + "PCA/PCA_PLOT.pdf"
JihedC's avatar
JihedC committed
68
69
70
71
HEATMAP             =       RESULT_DIR + "heatmap/heatmap_reference_point_genes.pdf"


container: "docker://continuumio/miniconda3:4.4.10"
72
73
74
75

rule all:
    input:
        FASTQC,
WJH58's avatar
WJH58 committed
76
        FORWARD_READS,
WJH58's avatar
WJH58 committed
77
        REVERSE_READS,
WJH58's avatar
WJH58 committed
78
79
        TRIMMED_FASTQC,
        MAPPED,
WJH58's avatar
WJH58 committed
80
81
        UNMAPPED,
        MAP_SORTED,
82
83
84
85
86
87
88
        #DEDUP,
        #STATS,
        #COVERAGE_TRACK,
        #BIGWIGSUMMARY,
        #PCAPLOT,
        #SORTED_INDEXED,
        #NARROWPEAK,
JihedC's avatar
JihedC committed
89
90
        #HMMRATAC,
        HEATMAP
91

92
    message : "Analysis is complete!"
JihedC's avatar
JihedC committed
93
94
    log:
        "log/rule_all.log"
95
96
97
    shell:""

################## INCLUDE RULES ##################
98
99
100
include: "rules/external_data.smk"
include: "rules/pre-processing.smk"
include: "rules/macs2.smk"
JihedC's avatar
JihedC committed
101
include: "rules/deeptools.smk"
102
include: "rules/hmmratac.smk"