Commit 54d94a31 authored by WJH58's avatar WJH58
Browse files

units.tsv and snakefile adapter for real data

parent 8269c82d
...@@ -43,16 +43,17 @@ wildcard_constraints: ...@@ -43,16 +43,17 @@ wildcard_constraints:
################## DESIRED OUTPUT ################## ################## DESIRED OUTPUT ##################
FASTQC = expand(RESULT_DIR + "fastqc/{samples}_{R}_2000reads_fastqc.html", samples = SAMPLES, R=['R1', 'R2']), FASTQC = expand(RESULT_DIR + "fastqc/{samples}_{R}_fastqc.html", samples = SAMPLES, R=['R1', 'R2']),
FORWARD_READS = expand(WORKING_DIR + "trimmed/{samples}_forward.fastq.gz", samples = SAMPLES), FORWARD_READS = expand(WORKING_DIR + "trimmed/{samples}_forward.fastq.gz", samples = SAMPLES),
REVERSE_READS = expand(WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz", samples = SAMPLES), REVERSE_READS = expand(WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz", samples = SAMPLES),
TRIMMED_FASTQC = expand(RESULT_DIR + "trimmed_fastqc/WT8_{direction}_fastqc.html", direction=['forward', 'reverse']), TRIMMED_FASTQC = expand(RESULT_DIR + "trimmed_fastqc/{samples}_{direction}_fastqc.html", samples = SAMPLES, direction=['forward', 'reverse']),
MAPPED = expand(WORKING_DIR + "mapped/{samples}.bam", samples = SAMPLES), MAPPED = expand(WORKING_DIR + "mapped/{samples}.bam", samples = SAMPLES),
UNMAPPED = expand([WORKING_DIR + "unmapped/{samples}.fq." + str(i) +".gz" for i in range(1,2)], samples = SAMPLES), UNMAPPED = expand([WORKING_DIR + "unmapped/{samples}.fq." + str(i) +".gz" for i in range(1,2)], samples = SAMPLES),
MAP_SORTED = expand(WORKING_DIR + "sort/{samples}.sorted.bam", samples = SAMPLES), MAP_SORTED = expand(WORKING_DIR + "sort/{samples}.sorted.bam", samples = SAMPLES),
SORTED_INDEXED = expand(WORKING_DIR + "index/{samples}.sorted.bam.bai", samples = SAMPLES), SORTED_INDEXED = expand(WORKING_DIR + "index/{samples}.sorted.bam.bai", samples = SAMPLES),
GENOME_INFO = expand(WORKING_DIR + "genome_info/{samples}.genome.info", samples = SAMPLES), GENOME_INFO = expand(WORKING_DIR + "genome_info/{samples}.genome.info", samples = SAMPLES),
GAPPED_PEAK = expand(RESULT_DIR + "peaks/{samples}_peaks.gappedPeak", samples = SAMPLES) GAPPED_PEAK = expand(RESULT_DIR + "peaks/{samples}_peaks.gappedPeak", samples = SAMPLES),
NAME_LOG = expand(RESULT_DIR + "peaks/{samples}.log", samples = SAMPLES)
rule all: rule all:
input: input:
...@@ -68,8 +69,10 @@ rule all: ...@@ -68,8 +69,10 @@ rule all:
UNMAPPED, UNMAPPED,
MAP_SORTED, MAP_SORTED,
SORTED_INDEXED, SORTED_INDEXED,
GENOME_INFO, #GENOME_INFO,
GAPPED_PEAK #GAPPED_PEAK,
#NAME_LOG
message : "Analysis is complete!" message : "Analysis is complete!"
shell:"" shell:""
...@@ -135,25 +138,25 @@ rule trimmomatic: ...@@ -135,25 +138,25 @@ rule trimmomatic:
rule trimmed_fastqc: rule trimmed_fastqc:
input: input:
WT8forward = WORKING_DIR + "trimmed/WT8_forward.fastq.gz", forward_reads = WORKING_DIR + "trimmed/{samples}_forward.fastq.gz",
WT8reverse = WORKING_DIR + "trimmed/WT8_reverse.fastq.gz" reverse_reads = WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz"
output: output:
RESULT_DIR + "trimmed_fastqc/WT8_{direction}_fastqc.html" RESULT_DIR + "trimmed_fastqc/{samples}_{direction}_fastqc.html"
params: params:
RESULT_DIR + "trimmed_fastqc/" RESULT_DIR + "trimmed_fastqc/"
conda: conda:
"envs/fastqc.yaml" "envs/fastqc.yaml"
log: log:
RESULT_DIR + "logs/trimmed_fastqc/WT8_{direction}.fastqc.log" RESULT_DIR + "logs/trimmed_fastqc/{samples}_{direction}.fastqc.log"
shell: shell:
"fastqc --outdir={params} {input.WT8forward} {input.WT8reverse} &>{log}" "fastqc --outdir={params} {input.forward_reads} {input.reverse_reads} &>{log}"
rule fastqc: rule fastqc:
input: input:
fwd = expand(DATA_DIR + "{samples}_R1_2000reads.fq.gz", samples = SAMPLES), fwd = expand(DATA_DIR + "sample_{numbers}_R1.fastq.gz", numbers = ['8','12','4_3','4_1']),
rev = expand(DATA_DIR + "{samples}_R2_2000reads.fq.gz", samples = SAMPLES) rev = expand(DATA_DIR + "sample_{numbers}_R2.fastq.gz", numbers = ['8','12','4_3','4_1'])
output: output:
expand(RESULT_DIR + "fastqc/{samples}_{R}_2000reads_fastqc.html", samples = SAMPLES, R=['R1', 'R2']) expand(RESULT_DIR + "fastqc/{samples}_{R}_fastqc.html", samples = SAMPLES, R=['R1', 'R2'])
log: log:
expand(RESULT_DIR + "logs/fastqc/{samples}.fastqc.log", samples = SAMPLES) expand(RESULT_DIR + "logs/fastqc/{samples}.fastqc.log", samples = SAMPLES)
params: params:
...@@ -184,14 +187,14 @@ rule mapping: ...@@ -184,14 +187,14 @@ rule mapping:
RESULT_DIR + "logs/bowtie/{samples}.log" RESULT_DIR + "logs/bowtie/{samples}.log"
shell: shell:
""" """
bowtie2 {params.bowtie} --threads {threads} -x {params.index} -1 {input.forward_reads} -2 {input.reverse_reads} -U {input.forwardUnpaired},{input.reverseUnpaired} --un-conc-gz {params.unmapped} | samtools view -Sb - > {output.mapped} 2>&{log} bowtie2 {params.bowtie} --threads {threads} -x {params.index} -1 {input.forward_reads} -2 {input.reverse_reads} -U {input.forwardUnpaired},{input.reverseUnpaired} --un-conc-gz {params.unmapped} | samtools view -Sb - > {output.mapped} >&{log}
""" """
rule sort: rule sort:
input: input:
mapped = WORKING_DIR + "mapped/{samples}.bam" mapped = WORKING_DIR + "mapped/{samples}.bam"
output: output:
map_sorted = WORKING_DIR + "sort/{samples}_map.sorted.bam" map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam"
conda: conda:
"envs/samtools_bowtie.yaml" "envs/samtools_bowtie.yaml"
log: log:
...@@ -201,9 +204,9 @@ rule sort: ...@@ -201,9 +204,9 @@ rule sort:
rule index: rule index:
input: input:
map_sorted = WORKING_DIR + "sort/{samples}_map.sorted.bam" map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam"
output: output:
map_sorted_indexed = WORKING_DIR + "index/{samples}_map.sorted.bam.bai" map_sorted_indexed = WORKING_DIR + "index/{samples}.sorted.bam.bai"
conda: conda:
"envs/samtools_bowtie.yaml" "envs/samtools_bowtie.yaml"
log: log:
...@@ -229,13 +232,12 @@ rule get_genome_info: ...@@ -229,13 +232,12 @@ rule get_genome_info:
rule peak_calling: rule peak_calling:
input: input:
map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam", map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam",
map_sorted_indexed = WORKING_DIR + "index/{samples}_map.sorted.bam.bai" map_sorted_indexed = WORKING_DIR + "index/{samples}.sorted.bam.bai"
output: output:
gapped_peak = RESULT_DIR + "peaks/{samples}_peaks.gappedPeak" gapped_peak = RESULT_DIR + "peaks/{samples}_peaks.gappedPeak",
Name_log = RESULT_DIR + "peaks/{samples}.log"
params: params:
genome_info = str(config['hmmratac']['genome.info']), genome_info = str(config['hmmratac']['genome.info']),
upper = 20,
lower = 10,
output_preflix = "{samples}" output_preflix = "{samples}"
log: log:
RESULT_DIR + "logs/peak_calling/{samples}.log" RESULT_DIR + "logs/peak_calling/{samples}.log"
...@@ -243,5 +245,5 @@ rule peak_calling: ...@@ -243,5 +245,5 @@ rule peak_calling:
"envs/hmmratac.yaml" "envs/hmmratac.yaml"
shell: shell:
""" """
HMMRATAC -b {input.map_sorted} -i {input.map_sorted_indexed} -g {params.genome_info} --output {params.output_preflix} --upper {params.upper} --lower {params.lower} 2&>{log} HMMRATAC -b {input.map_sorted} -i {input.map_sorted_indexed} -g {params.genome_info} -o {params.output_preflix}
""" """
sample fq1 fq2 sample fq1 fq2
WT8 ../data/WT8_R1_2000reads.fq.gz ../data/WT8_R2_2000reads.fq.gz WT1 ../data/sample_4_1_R1.fastq.gz ../data/sample_4_1_R2.fastq.gz
KO12 ../data/KO12_R1_10reads.fq.gz ../data/KO12_R2_10reads.fq.gz WT2 ../data/sample_8_R1.fastq.gz ../data/sample_8_R2.fastq.gz
KO1 ../data/sample_4_3_R1.fastq.gz ../data/sample_4_3_R2.fastq.gz
KO2 ../data/sample_12_R1.fastq.gz ../data/sample_12_R2.fastq.gz
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment