Snakefile 4.41 KB
Newer Older
JihedC's avatar
JihedC committed
1
# Snakemake pipeline for the analysis of ChIP_seq PE and SE
Chouaref's avatar
Chouaref committed
2
3


JihedC's avatar
JihedC committed
4
5
################## Import libraries ##################

Chouaref's avatar
Chouaref committed
6
import pandas as pd
JihedC's avatar
JihedC committed
7
8
9
10
11
import os
import sys
from subprocess import call
import itertools
from snakemake.utils import R
Chouaref's avatar
Chouaref committed
12
13


JihedC's avatar
JihedC committed
14
################## Configuration file ##################
Chouaref's avatar
Chouaref committed
15

JihedC's avatar
JihedC committed
16
17
18
19
configfile: "config.yaml"
WORKING_DIR =   config["working_dir"]
RESULT_DIR  =   config["result_dir"]
annotation  =   config["annotation"]
Chouaref's avatar
Chouaref committed
20

JihedC's avatar
JihedC committed
21
################## Configuration file ##################
Chouaref's avatar
Chouaref committed
22

JihedC's avatar
JihedC committed
23
24
25
26
27
# read the tab separated table containing columns: sample, fq1, fq2 and condition
units = pd.read_table(config["units"], dtype=str).set_index(["sample"], drop=False)
SAMPLES = units.index.get_level_values('sample').unique().tolist()
samples = pd.read_csv(config["units"], dtype=str,index_col=0,sep="\t")
samplefile = config["units"]
Chouaref's avatar
Chouaref committed
28

JihedC's avatar
JihedC committed
29
################## Helper functions ##################
Chouaref's avatar
Chouaref committed
30

JihedC's avatar
JihedC committed
31
32
33
34
35
36
def sample_is_single_end(sample):
    """This function detect missing value in the column 2 of the units.tsv"""
    if "fq2" not in samples.columns:
        return True
    else:
        return pd.isnull(samples.loc[(sample), "fq2"])
Chouaref's avatar
Chouaref committed
37

JihedC's avatar
JihedC committed
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def get_fastq(wildcards):
    """This function checks if the sample has paired end or single end reads and returns 1 or 2 names of the fastq files"""
    if sample_is_single_end(wildcards.sample):
        return samples.loc[(wildcards.sample), ["fq1"]].dropna()
    else:
        return samples.loc[(wildcards.sample), ["fq1", "fq2"]].dropna()

def get_trim_names(wildcards):
    """
    This function:
      1. Checks if the sample is paired end or single end
      2. Returns the correct input and output trimmed file names. 
    """
    if sample_is_single_end(wildcards.sample):
        inFile = samples.loc[(wildcards.sample), ["fq1"]].dropna()
        return "--in1 " + inFile[0] + " --out1 " + WORKING_DIR + "trimmed/" + wildcards.sample + "_R1_trimmed.fq.gz" 
    else:
        inFile = samples.loc[(wildcards.sample), ["fq1", "fq2"]].dropna()
        return "--in1 " + inFile[0] + " --in2 " + inFile[1] + " --out1 " + WORKING_DIR + "trimmed/" + wildcards.sample + "_R1_trimmed.fq.gz --out2 "  + WORKING_DIR + "trimmed/" + wildcards.sample + "_R2_trimmed.fq.gz"

def get_star_names(wildcards):
    """
    This function:
      1. Checks if the sample is paired end or single end.
      2. Returns the correct input file names for STAR mapping step.
    """
    if sample_is_single_end(wildcards.sample):
        return WORKING_DIR + "trimmed/" + wildcards.sample + "_R1_trimmed.fq.gz"     
    else:
        return WORKING_DIR + "trimmed/" + wildcards.sample + "_R1_trimmed.fq.gz " + WORKING_DIR + "trimmed/" + wildcards.sample + "_R2_trimmed.fq.gz"
Chouaref's avatar
Chouaref committed
68

JihedC's avatar
JihedC committed
69
70
71
72
73
74
75

def get_samples_per_treatment(input_df="units.tsv",colsamples="sample",coltreatment="condition",treatment="control"):
    """This function returns a list of samples that correspond to the same experimental condition"""
    df = pd.read_table(input_df)
    df = df.loc[df[coltreatment] == treatment]
    filtered_samples = df[colsamples].tolist()
    return filtered_samples
Chouaref's avatar
Chouaref committed
76
77
78

CASES = get_samples_per_treatment(treatment="treatment")
CONTROLS = get_samples_per_treatment(treatment="control")
JihedC's avatar
JihedC committed
79
################## Wilcards constrains  ##################
Chouaref's avatar
Chouaref committed
80
81
82
83
84

wildcard_constraints:
    sample = "[A-Za-z0-9]+"

wildcard_constraints:
JihedC's avatar
JihedC committed
85
    unit = "[A-Za-z0-9]+"
Chouaref's avatar
Chouaref committed
86

JihedC's avatar
JihedC committed
87
################## DESIRED OUTPUT ##################
Chouaref's avatar
Chouaref committed
88
89
90
##############
# Desired output
##############
JihedC's avatar
JihedC committed
91
92
FASTP               =     expand(WORKING_DIR + "trimmed/" + "{sample}_{read}_trimmed.fq.gz", sample=SAMPLES, read={"R1", "R2"})
BOWTIE2             =     expand(WORKING_DIR + "mapped/{sample}.bam", sample= SAMPLES)
JihedC's avatar
JihedC committed
93
94
FASTQC              =     expand(RESULT_DIR + "fastqc/{sample}.fastqc.html", sample=SAMPLES)
BIGWIG              =     expand(RESULT_DIR + "bigwig/{sample}_rpkm.bw", sample=SAMPLES)
JihedC's avatar
JihedC committed
95
MACS2               =     expand(RESULT_DIR + "macs2/{treatment}_vs_{control}_peaks.narrowPeak", treatment = CASES, control = CONTROLS)  
Chouaref's avatar
Chouaref committed
96
97
98
99
100
101

###############
# Final output
################
rule all:
    input:
JihedC's avatar
JihedC committed
102
103
        FASTP,
        FASTQC,
JihedC's avatar
JihedC committed
104
        BOWTIE2,
JihedC's avatar
JihedC committed
105
106
107
        BIGWIG,
        MACS2
        
Chouaref's avatar
Chouaref committed
108
109
    message: "ChIP-seq SE pipeline succesfully run."		#finger crossed to see this message!

JihedC's avatar
JihedC committed
110
111
112
113
114
    shell:"""
    cp units.tsv RESULT_DIR 
    cp config.yaml RESULT_DIR
    #rm -rf {WORKING_DIR}
    """
Chouaref's avatar
Chouaref committed
115
116
117
118
119
120
121

###############
# Rules
###############

include : "rules/external_data.smk"
include : 'rules/pre_processing.smk'
JihedC's avatar
JihedC committed
122
include : "rules/peak_calling.smk"
Chouaref's avatar
Chouaref committed
123
124
include : "rules/deeptools_post_processing.smk"