Commit 9b0a5341 authored by Sam Nooij's avatar Sam Nooij
Browse files

Add stats summary rules for deduplicated reads

parent d9795808
......@@ -108,18 +108,10 @@ rule all:
sample = SAMPLES, ref = REFERENCE_NAMES),
#Paired reads mapped to reference
expand("data/tmp/Deduplicated-metrics_{sample}_to_{ref}-paired.txt",
sample = SAMPLES, ref = REFERENCE_NAMES),
#Paired deduplicated mapped reads
expand("data/tmp/Mapped-only_{sample}_to_{ref}-unpaired.bam",
sample = SAMPLES, ref = REFERENCE_NAMES),
#Unpaired reads mapped to reference
expand("data/tmp/Deduplicated-metrics_{sample}_to_{ref}-unpaired.txt",
sample = SAMPLES, ref = REFERENCE_NAMES),
#Unpaired deduplicated mapped reads
expand("data/tmp/counts/Mapped_read_counts-{sample}_to_{ref}-paired.tsv",
sample = SAMPLES, ref = REFERENCE_NAMES),
#Counts of mapped paired reads
......@@ -146,6 +138,41 @@ rule all:
"data/processed/Depth_of_coverage.tsv",
#Concatenated tables with depth of coverage
# And the _deduplicated_ mapped files:
expand("data/tmp/Deduplicated-mapped_{sample}_to_{ref}-paired.bam",
sample = SAMPLES, ref = REFERENCE_NAMES),
#Paired deduplicated mapped reads
expand("data/tmp/Deduplicated-mapped_{sample}_to_{ref}-unpaired.bam",
sample = SAMPLES, ref = REFERENCE_NAMES),
#Unpaired deduplicated mapped reads
expand("data/tmp/counts/Deduplicated_read_counts-{sample}_to_{ref}-paired.tsv",
sample = SAMPLES, ref = REFERENCE_NAMES),
#Counts of mapped paired reads
expand("data/tmp/counts/Deduplicated_read_counts-{sample}_to_{ref}-unpaired.tsv",
sample = SAMPLES, ref = REFERENCE_NAMES),
#Counts of mapped unpaired reads
expand("data/tmp/depth/Deduplicated_depth_of_coverage-{sample}_to_{ref}-paired.tsv",
sample = SAMPLES, ref = REFERENCE_NAMES),
#Depth of coverage per position for paired reads
expand("data/tmp/depth/Deduplicated_depth_of_coverage-{sample}_to_{ref}-unpaired.tsv",
sample = SAMPLES, ref = REFERENCE_NAMES),
#Depth of coverage per position for unpaired reads
"data/processed/Deduplicated_read_counts-paired.tsv",
"data/processed/Deduplicated_read_counts-unpaired.tsv",
"data/processed/Deduplicated_read_counts.tsv",
#Concatenated tables with number of mapped reads per sequence per sample
"data/processed/Deduplicated_depth_of_coverage-paired.tsv",
"data/processed/Deduplicated_depth_of_coverage-unpaired.tsv",
"data/processed/Deduplicated_depth_of_coverage.tsv",
#Concatenated tables with depth of coverage
### Step 3: include processing steps to generate output ###
......@@ -468,7 +495,6 @@ rule deduplicate_paired_reads:
picard MarkDuplicates --REMOVE_DUPLICATES -I {input} -O {output.bam} -M {output.metrics}
"""
rule map_unpaired_trimmed_reads:
input:
unpaired="data/raw/trimmed_reads/{sample}_unpaired.fq",
......@@ -549,6 +575,23 @@ rule count_mapped_paired_reads:
bash bin/count_mapped_reads.sh {input} > {output} 2> {log}
"""
rule count_deduplicated_paired_reads:
input:
"data/tmp/Deduplicated-mapped_{sample}_to_{ref}-paired.bam"
output:
"data/tmp/counts/Deduplicated_read_counts-{sample}_to_{ref}-paired.tsv"
conda:
"envs/bwa-samtools.yaml"
threads: 1
log:
"log/count_deduplicated_paired_reads-{sample}_to_{ref}.txt"
benchmark:
"log/benchmark/count_deduplicated_paired_reads-{sample}_to_{ref}.txt"
shell:
"""
bash bin/count_mapped_reads.sh {input} > {output} 2> {log}
"""
rule concatenate_mapped_paired_reads:
input:
expand("data/tmp/counts/Mapped_read_counts-{sample}_to_{ref}-paired.tsv",
......@@ -567,6 +610,24 @@ rule concatenate_mapped_paired_reads:
python bin/concatenate_read_counts.py -i {input} -o {output} > {log} 2>&1
"""
rule concatenate_deduplicated_paired_reads:
input:
expand("data/tmp/counts/Deduplicated_read_counts-{sample}_to_{ref}-paired.tsv",
sample = SAMPLES, ref = REFERENCE_NAMES)
output:
"data/processed/Deduplicated_read_counts-paired.tsv"
conda:
"envs/pandas.yaml"
threads: 1
log:
"log/concatenate_deduplicated_paired_reads.txt"
benchmark:
"log/benchmark/concatenate_deduplicated_paired_reads.txt"
shell:
"""
python bin/concatenate_read_counts.py -i {input} -o {output} > {log} 2>&1
"""
rule count_mapped_unpaired_reads:
input:
"data/tmp/Mapped-only_{sample}_to_{ref}-unpaired.bam",
......@@ -584,6 +645,23 @@ rule count_mapped_unpaired_reads:
bash bin/count_mapped_reads.sh {input} > {output} 2> {log}
"""
rule count_deduplicated_unpaired_reads:
input:
"data/tmp/Deduplicated-mapped_{sample}_to_{ref}-unpaired.bam"
output:
"data/tmp/counts/Deduplicated_read_counts-{sample}_to_{ref}-unpaired.tsv"
conda:
"envs/bwa-samtools.yaml"
threads: 1
log:
"log/count_deduplicated_unpaired_reads-{sample}_to_{ref}.txt"
benchmark:
"log/benchmark/count_deduplicated_unpaired_reads-{sample}_to_{ref}.txt"
shell:
"""
bash bin/count_mapped_reads.sh {input} > {output} 2> {log}
"""
rule concatenate_mapped_unpaired_reads:
input:
expand("data/tmp/counts/Mapped_read_counts-{sample}_to_{ref}-unpaired.tsv",
......@@ -602,6 +680,24 @@ rule concatenate_mapped_unpaired_reads:
python bin/concatenate_read_counts.py -i {input} -o {output} > {log} 2>&1
"""
rule concatenate_deduplicated_unpaired_reads:
input:
expand("data/tmp/counts/Deduplicated_read_counts-{sample}_to_{ref}-unpaired.tsv",
sample = SAMPLES, ref = REFERENCE_NAMES)
output:
"data/processed/Deduplicated_read_counts-unpaired.tsv"
conda:
"envs/pandas.yaml"
threads: 1
log:
"log/concatenate_deduplicated_unpaired_reads.txt"
benchmark:
"log/benchmark/concatenate_deduplicated_unpaired_reads.txt"
shell:
"""
python bin/concatenate_read_counts.py -i {input} -o {output} > {log} 2>&1
"""
rule sum_mapped_reads:
input:
paired="data/processed/Mapped_read_counts-paired.tsv",
......@@ -618,6 +714,22 @@ rule sum_mapped_reads:
script:
"bin/sum_mapped_reads.py"
rule sum_deduplicated_reads:
input:
paired="data/processed/Deduplicated_read_counts-paired.tsv",
unpaired="data/processed/Deduplicated_read_counts-unpaired.tsv"
output:
"data/processed/Deduplicated_read_counts.tsv"
conda:
"envs/pandas.yaml"
threads: 1
log:
"log/sum_deduplicated_reads.txt"
benchmark:
"log/benchmark/sum_deduplicated_reads.txt"
script:
"bin/sum_mapped_reads.py"
rule calculate_depth_paired_reads:
input:
"data/tmp/Mapped-only_{sample}_to_{ref}-paired.bam",
......@@ -635,6 +747,23 @@ rule calculate_depth_paired_reads:
bash bin/calculate_depth_per_position.sh {input} > {output} 2> {log}
"""
rule calculate_depth_deduplicated_paired_reads:
input:
"data/tmp/Deduplicated-mapped_{sample}_to_{ref}-paired.bam"
output:
"data/tmp/depth/Deduplicated_depth_of_coverage-{sample}_to_{ref}-paired.tsv"
conda:
"envs/bwa-samtools.yaml"
threads: 1
log:
"log/calculate_depth_deduplicated_paired_reads-{sample}_to_{ref}.txt"
benchmark:
"log/benchmark/calculate_depth_deduplicated_paired_reads-{sample}_to_{ref}.txt"
shell:
"""
bash bin/calculate_depth_per_position.sh {input} > {output} 2> {log}
"""
rule concatenate_depth_paired_reads:
input:
expand("data/tmp/depth/Depth_of_coverage-{sample}_to_{ref}-paired.tsv",
......@@ -653,6 +782,24 @@ rule concatenate_depth_paired_reads:
python bin/concatenate_depth_tables.py -i {input} -o {output} > {log} 2>&1
"""
rule concatenate_depth_deduplicated_paired_reads:
input:
expand("data/tmp/depth/Deduplicated_depth_of_coverage-{sample}_to_{ref}-paired.tsv",
sample = SAMPLES, ref = REFERENCE_NAMES)
output:
"data/processed/Deduplicated_depth_of_coverage-paired.tsv"
conda:
"envs/pandas.yaml"
threads: 1
log:
"log/concatenate_depth_deduplicated_paired_reads.txt"
benchmark:
"log/benchmark/concatenate_depth_deduplicated_paired_reads.txt"
shell:
"""
python bin/concatenate_depth_tables.py -i {input} -o {output} > {log} 2>&1
"""
rule calculate_depth_unpaired_reads:
input:
"data/tmp/Mapped-only_{sample}_to_{ref}-unpaired.bam",
......@@ -670,6 +817,23 @@ rule calculate_depth_unpaired_reads:
bash bin/calculate_depth_per_position.sh {input} > {output} 2> {log}
"""
rule calculate_depth_deduplicated_unpaired_reads:
input:
"data/tmp/Deduplicated-mapped_{sample}_to_{ref}-unpaired.bam"
output:
"data/tmp/depth/Deduplicated_depth_of_coverage-{sample}_to_{ref}-unpaired.tsv"
conda:
"envs/bwa-samtools.yaml"
threads: 1
log:
"log/calculate_depth_deduplicated_unpaired_reads-{sample}_to_{ref}.txt"
benchmark:
"log/benchmark/calculate_depth_deduplicated_unpaired_reads-{sample}_to_{ref}.txt"
shell:
"""
bash bin/calculate_depth_per_position.sh {input} > {output} 2> {log}
"""
rule concatenate_depth_unpaired_reads:
input:
expand("data/tmp/depth/Depth_of_coverage-{sample}_to_{ref}-unpaired.tsv",
......@@ -688,6 +852,24 @@ rule concatenate_depth_unpaired_reads:
python bin/concatenate_depth_tables.py -i {input} -o {output} > {log} 2>&1
"""
rule concatenate_depth_deduplicated_unpaired_reads:
input:
expand("data/tmp/depth/Deduplicated_depth_of_coverage-{sample}_to_{ref}-unpaired.tsv",
sample = SAMPLES, ref = REFERENCE_NAMES)
output:
"data/processed/Deduplicated_depth_of_coverage-unpaired.tsv"
conda:
"envs/pandas.yaml"
threads: 1
log:
"log/concatenate_depth_deduplicated_unpaired_reads.txt"
benchmark:
"log/benchmark/concatenate_depth_deduplicated_unpaired_reads.txt"
shell:
"""
python bin/concatenate_depth_tables.py -i {input} -o {output} > {log} 2>&1
"""
rule sum_depths:
input:
paired="data/processed/Depth_of_coverage-paired.tsv",
......@@ -703,6 +885,21 @@ rule sum_depths:
script:
"bin/sum_depths.py"
rule sum_deduplicated_depths:
input:
paired="data/processed/Deduplicated_depth_of_coverage-paired.tsv",
unpaired="data/processed/Deduplicated_depth_of_coverage-unpaired.tsv"
output:
"data/processed/Deduplicated_depth_of_coverage.tsv"
conda:
"envs/pandas.yaml"
log:
"log/sum_depths.txt"
benchmark:
"log/benchmark/sum_depths.txt"
script:
"bin/sum_depths.py"
####
## Finally, when all is done remove unnecessary (big) files ##
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment