Unverified Commit bab1a3b4 authored by Jihed Chouaref's avatar Jihed Chouaref Committed by GitHub
Browse files

Update and rename snakefile to Snakefile

parent d3011c41
......@@ -42,24 +42,27 @@ wildcard_constraints:
################## DESIRED OUTPUT ##################
FASTQC = expand(RESULT_DIR + "fastqc/sample_{numbers}_{R}_fastqc.html",numbers = ['8','12','4_3','4_1'], R=['R1', 'R2']),
FORWARD_READS = expand(WORKING_DIR + "trimmed/{samples}_forward.fastq.gz", samples = SAMPLES),
REVERSE_READS = expand(WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz", samples = SAMPLES),
TRIMMED_FASTQC = expand(RESULT_DIR + "trimmed_fastqc/{samples}_{direction}_fastqc.html", samples = SAMPLES, direction=['forward', 'reverse']),
MAPPED = expand(WORKING_DIR + "mapped/{samples}.bam", samples = SAMPLES),
UNMAPPED = expand([WORKING_DIR + "unmapped/{samples}.fq." + str(i) +".gz" for i in range(1,2)], samples = SAMPLES),
MAP_SORTED = expand(WORKING_DIR + "sort/{samples}.sorted.bam", samples = SAMPLES),
DEDUP = expand(WORKING_DIR + "dedup/{samples}.dedup.bam", samples = SAMPLES),
STATS = expand(WORKING_DIR + "dedup/{samples}.dedup.stats", samples = SAMPLES),
SORTED_INDEXED = expand(WORKING_DIR + "index/{samples}.sorted.bam.bai", samples = SAMPLES),
GENOME_INFO = expand(WORKING_DIR + "genome_info/{samples}.genome.info", samples = SAMPLES),
GAPPED_PEAK = expand(RESULT_DIR + "peaks/{samples}_peaks.gappedPeak", samples = SAMPLES),
NAME_LOG = expand(RESULT_DIR + "peaks/{samples}.log", samples = SAMPLES),
NARROWPEAK = expand(RESULT_DIR + "macs2/{samples}_peaks.narrowPeak", samples = SAMPLES),
COVERAGE_TRACK = expand(WORKING_DIR + "bamCoverage/{samples}.bw", samples = SAMPLES),
BIGWIGSUMMARY = expand(WORKING_DIR + "bigwigsummary/{samples}.npz", samples = SAMPLES),
PCAPLOT = expand(RESULT_DIR + "PCA/{samples}_pca.png", samples = SAMPLES)
# Here we define the outputs of rules we want the pipeline to produce.
# The varialble defined here is then used in the `rule all`.
# Fastqc rule need to be modified to accept any sample
FASTQC = expand(RESULT_DIR + "fastqc/sample_{numbers}_{R}_fastqc.html",numbers = ['8','12','4_3','4_1'], R=['R1', 'R2']),
FORWARD_READS = expand(WORKING_DIR + "trimmed/{samples}_forward.fastq.gz", samples = SAMPLES),
REVERSE_READS = expand(WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz", samples = SAMPLES),
TRIMMED_FASTQC = expand(RESULT_DIR + "trimmed_fastqc/{samples}_{direction}_fastqc.html", samples = SAMPLES, direction=['forward', 'reverse']),
MAPPED = expand(WORKING_DIR + "mapped/{samples}.bam", samples = SAMPLES),
UNMAPPED = expand([WORKING_DIR + "unmapped/{samples}.fq." + str(i) +".gz" for i in range(1,2)], samples = SAMPLES),
MAP_SORTED = expand(WORKING_DIR + "sort/{samples}.sorted.bam", samples = SAMPLES),
DEDUP = expand(WORKING_DIR + "dedup/{samples}.dedup.bam", samples = SAMPLES),
STATS = expand(WORKING_DIR + "dedup/{samples}.dedup.stats", samples = SAMPLES),
SORTED_INDEXED = expand(WORKING_DIR + "sort/{samples}.sorted.bam.bai", samples = SAMPLES),
GENOME_INFO = expand(WORKING_DIR + "genome_info/{samples}.genome.info", samples = SAMPLES),
GAPPED_PEAK = expand(RESULT_DIR + "peaks/{samples}_peaks.gappedPeak", samples = SAMPLES),
NAME_LOG = expand(RESULT_DIR + "peaks/{samples}.log", samples = SAMPLES),
NARROWPEAK = expand(RESULT_DIR + "macs2/{samples}_peaks.narrowPeak", samples = SAMPLES),
COVERAGE_TRACK = expand(RESULT_DIR + "bamCoverage/{samples}.bw", samples = SAMPLES),
BIGWIGSUMMARY = RESULT_DIR + "bigwigsummary/multiBigwigSummary.npz",
PCAPLOT = RESULT_DIR + "PCA/PCA_PLOT.pdf"
rule all:
input:
......@@ -79,8 +82,8 @@ rule all:
COVERAGE_TRACK,
BIGWIGSUMMARY,
PCAPLOT,
#SORTED_INDEXED,
#NARROWPEAK,
SORTED_INDEXED,
NARROWPEAK,
#GENOME_INFO,
#GAPPED_PEAK,
#NAME_LOG
......@@ -204,15 +207,19 @@ rule mapping:
rule sort:
input:
mapped = WORKING_DIR + "mapped/{samples}.bam"
mapped = WORKING_DIR + "mapped/{samples}.bam"
output:
map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam"
sorted = WORKING_DIR + "sort/{samples}.sorted.bam",
bai = WORKING_DIR + "sort/{samples}.sorted.bam.bai"
conda:
"envs/samtools_bowtie.yaml"
log:
RESULT_DIR + "logs/sort/{samples}.log"
shell:
"samtools sort {input} -o {output} &>{log}"
"""
samtools sort {input} -o {output.sorted} &>{log}
samtools index {output.sorted}
"""
rule deduplication:
input:
......@@ -229,17 +236,17 @@ rule deduplication:
picard MarkDuplicates -I {input} -O {output.dedup} -M {output.stats} 2>{log}
"""
rule index:
input:
map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam"
output:
map_sorted_indexed = WORKING_DIR + "sort/{samples}.sorted.bam.bai"
conda:
"envs/samtools_bowtie.yaml"
log:
RESULT_DIR + "logs/index/{samples}.log"
shell:
"samtools index {input} {output} &>{log}"
# #rule index:
# #input:
# map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam"
# output:
# map_sorted_indexed = WORKING_DIR + "sort/{samples}.sorted.bam.bai"
# conda:
# "envs/samtools_bowtie.yaml"
# log:
# RESULT_DIR + "logs/index/{samples}.log"
# shell:
# "samtools index {input} {output} &>{log}"
rule get_genome_info:
input:
......@@ -298,39 +305,44 @@ rule bamCoverage:
input:
map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam"
output:
coverage_track = WORKING_DIR + "bamCoverage/{samples}.bw"
coverage_track = RESULT_DIR + "bamCoverage/{samples}.bw"
params:
effectiveGenomeSize = str(config['bamCoverage']['effectiveGenomeSize']),
normalization = str(config['bamCoverage']['normalization'])
effectiveGenomeSize = str(config['bamCoverage']['effectiveGenomeSize']),
normalization = str(config['bamCoverage']['normalization']),
binSize = str(config['bamCoverage']['binSize'])
log:
RESULT_DIR + "logs/bamCoverage/{samples}.bw.log"
conda:
"envs/deeptools.yaml"
shell:
"bamCoverage -b {input} --effectiveGenomeSize {params.effectiveGenomeSize} --normalizeUsing {params.normalization} --ignoreDuplicates -o {output}"
"bamCoverage -b {input} --effectiveGenomeSize {params.effectiveGenomeSize} --normalizeUsing {params.normalization} --ignoreDuplicates -o {output} --binSize {params.binSize}"
#bdg: create bedgraph output files
#format=BAMPE: only use properly-paired read alignments
# rule multibigwigSummary need to be expanded to use more than 1 bigwig file
# Right now the file only use 1 file, which is not useful for a comparison
rule multibigwigSummary:
input:
coverage_track = WORKING_DIR + "bamCoverage/{samples}.bw"
lambda wildcards: expand(RESULT_DIR + "bamCoverage/{sample}.bw", sample = SAMPLES)
output:
bigwigsummary = WORKING_DIR + "bigwigsummary/{samples}.npz"
bigwigsummary = RESULT_DIR + "bigwigsummary/multiBigwigSummary.npz"
log:
RESULT_DIR + "logs/bigwigsummary/{samples}.bw.log"
RESULT_DIR + "logs/bigwigsummary/multiBigwigSummary.log"
conda:
"envs/deeptools.yaml"
shell:
"multiBigwigSummary bins -b {input} -o {output}"
"multiBigwigSummary bins -b {input} -o {output} "
#2>{log}
# PlotPCA does not work with a bigwigsummary made of only one sample
rule PCA:
input:
bigwigsummary = WORKING_DIR + "bigwigsummary/{samples}.npz"
bigwigsummary = RESULT_DIR + "bigwigsummary/multiBigwigSummary.npz"
output:
PCAplot = RESULT_DIR + "PCA/{samples}_pca.png"
PCAplot = RESULT_DIR + "PCA/PCA_PLOT.pdf"
log:
RESULT_DIR + "logs/bigwigsummary/{samples}_pca.png.log"
RESULT_DIR + "logs/bigwigsummary/PCA_PLOT.log"
conda:
"envs/deeptools.yaml"
shell:
"plotPCA -in {input} -o {output}"
"plotPCA -in {input} -o {output} 2>{log}"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment