Commit 8269c82d authored by WJH58's avatar WJH58
Browse files

added params under rule peak_calling

parent c0085bb1
Version: 1.2.10
Arguments Used:
-b temp/sort/WT8.sorted.bam
-i temp/index/WT8.sorted.bam.bai
-g genome.info
Version: 1.2.10
Arguments Used:
-b temp/sort/WT8.sorted.bam
-i temp/index/WT8_map.sorted.bam.bai
-g genome.info
--output WT8
--upper 20
--lower 10
......@@ -35,3 +35,7 @@ bowtie2:
max_fragment_len: "--maxins 500" # maximum fragment length for valid paired-end alignments
min_fragment_len: "--minins 80" # minimum fragment length for valid paired-end alignments
verbose: "-q"
# genome.info
hmmratac:
genome.info: "genome.info"
channels:
- bioconda
dependencies:
- HMMRATAC=1.2.10
channels:
- bioconda
dependencies:
- perl-bioperl=1.7
- samtools=1.9
chr1 195471971
chr2 182113224
chr3 160039680
chr4 156508116
chr5 151834684
chr6 149736546
chr7 145441459
chr8 129401213
chr9 124595110
chr10 130694993
chr11 122082543
chr12 120129022
chr13 120421639
chr14 124902244
chr15 104043685
chr16 98207768
chr17 94987271
chr18 90702639
chr19 61431566
chrX 171031299
chrY 91744698
chrM 16299
GL456210.1 169725
GL456211.1 241735
GL456212.1 153618
GL456213.1 39340
GL456216.1 66673
GL456219.1 175968
GL456221.1 206961
GL456233.1 336933
GL456239.1 40056
GL456350.1 227966
GL456354.1 195993
GL456359.1 22974
GL456360.1 31704
GL456366.1 47073
GL456367.1 42057
GL456368.1 20208
GL456370.1 26764
GL456372.1 28664
GL456378.1 31602
GL456379.1 72385
GL456381.1 25871
GL456382.1 23158
GL456383.1 38659
GL456385.1 35240
GL456387.1 24685
GL456389.1 28772
GL456390.1 24668
GL456392.1 23629
GL456393.1 55711
GL456394.1 24323
GL456396.1 21240
JH584292.1 14945
JH584293.1 207968
JH584294.1 191905
JH584295.1 1976
JH584296.1 199368
JH584297.1 205776
JH584298.1 184189
JH584299.1 953012
JH584300.1 182347
JH584301.1 259875
JH584302.1 155838
JH584303.1 158099
JH584304.1 114452
```
rule trimmomatic:
input:
reads = get_fastq,
adapters = config['trimmomatic']["adapters"]
output:
forward_reads = WORKING_DIR + "trimmed/{samples}_forward.fastq.gz",
reverse_reads = WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz",
forwardUnpaired = temp(WORKING_DIR + "trimmed/{samples}_forward_unpaired.fastq.gz"),
reverseUnpaired = temp(WORKING_DIR + "trimmed/{samples}_reverse_unpaired.fastq.gz"),
log:
RESULT_DIR + "logs/trimmomatic/{samples}.log"
params:
seedMisMatches = str(config['trimmomatic']['seedMisMatches']),
palindromeClipTreshold = str(config['trimmomatic']['palindromeClipTreshold']),
simpleClipThreshhold = str(config['trimmomatic']['simpleClipThreshold']),
LeadMinTrimQual = str(config['trimmomatic']['LeadMinTrimQual']),
TrailMinTrimQual = str(config['trimmomatic']['TrailMinTrimQual']),
windowSize = str(config['trimmomatic']['windowSize']),
avgMinQual = str(config['trimmomatic']['avgMinQual']),
minReadLen = str(config['trimmomatic']['minReadLength']),
phred = str(config["trimmomatic"]["phred"])
threads: 10
conda:
"envs/trimmomatic.yaml"
shell:
"""
trimmomatic PE \
-threads 10 \
-phred33 \
{input.reads} \
{output.forward_reads} {output.forwardUnpaired} {output.reverse_reads} {output.reverseUnpaired} \
ILLUMINACLIP:{input.adapters}:{params.seedMisMatches}:{params.palindromeClipTreshold}:{params.simpleClipThreshhold} \
LEADING:3 \
TRAILING:3 \
SLIDINGWINDOW:4:15 \
MINLEN:40 &>{log}
"""
```
####Trimmomatic Arguments####
Version: 1.2.10
Arguments Used:
-b temp/sort/WT8.sorted.bam
-i temp/index/WT8_map.sorted.bam.bai
-g genome.info
--output results/peaks/WT8_peaks.gappedPeak
--upper 20
--lower 10
......@@ -50,7 +50,9 @@ TRIMMED_FASTQC = expand(RESULT_DIR + "trimmed_fastqc/WT8_{direction}_fastqc.htm
MAPPED = expand(WORKING_DIR + "mapped/{samples}.bam", samples = SAMPLES),
UNMAPPED = expand([WORKING_DIR + "unmapped/{samples}.fq." + str(i) +".gz" for i in range(1,2)], samples = SAMPLES),
MAP_SORTED = expand(WORKING_DIR + "sort/{samples}.sorted.bam", samples = SAMPLES),
SORTED_INDEXED = expand(WORKING_DIR + "index/{samples}.sorted.bam.bai", samples = SAMPLES)
SORTED_INDEXED = expand(WORKING_DIR + "index/{samples}.sorted.bam.bai", samples = SAMPLES),
GENOME_INFO = expand(WORKING_DIR + "genome_info/{samples}.genome.info", samples = SAMPLES),
GAPPED_PEAK = expand(RESULT_DIR + "peaks/{samples}_peaks.gappedPeak", samples = SAMPLES)
rule all:
input:
......@@ -65,7 +67,9 @@ rule all:
MAPPED,
UNMAPPED,
MAP_SORTED,
SORTED_INDEXED
SORTED_INDEXED,
GENOME_INFO,
GAPPED_PEAK
message : "Analysis is complete!"
shell:""
......@@ -180,29 +184,64 @@ rule mapping:
RESULT_DIR + "logs/bowtie/{samples}.log"
shell:
"""
bowtie2 {params.bowtie} --threads {threads} -x {params.index} -1 {input.forward_reads} -2 {input.reverse_reads} -U {input.forwardUnpaired},{input.reverseUnpaired} --un-conc-gz {params.unmapped} | samtools view -Sb - > {output.mapped} 2>{log}
bowtie2 {params.bowtie} --threads {threads} -x {params.index} -1 {input.forward_reads} -2 {input.reverse_reads} -U {input.forwardUnpaired},{input.reverseUnpaired} --un-conc-gz {params.unmapped} | samtools view -Sb - > {output.mapped} 2>&{log}
"""
rule sort:
input:
mapped = WORKING_DIR + "mapped/{samples}.bam"
output:
map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam"
map_sorted = WORKING_DIR + "sort/{samples}_map.sorted.bam"
conda:
"envs/samtools_bowtie.yaml"
log:
RESULT_DIR + "logs/sort/{samples}.log"
shell:
"samtools sort {input} -o {output}"
"samtools sort {input} -o {output} &>{log}"
rule index:
input:
map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam"
map_sorted = WORKING_DIR + "sort/{samples}_map.sorted.bam"
output:
sorted_indexed = WORKING_DIR + "index/{samples}.sorted.bam.bai"
map_sorted_indexed = WORKING_DIR + "index/{samples}_map.sorted.bam.bai"
conda:
"envs/samtools_bowtie.yaml"
log:
RESULT_DIR + "logs/index/{samples}.log"
shell:
"samtools index {input} {output}"
"samtools index {input} {output} &>{log}"
rule get_genome_info:
input:
map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam"
output:
genome_info = WORKING_DIR + "genome_info/{samples}.genome.info"
conda:
"envs/perl.yaml"
shell:
"""
samtools view -H {input} | grep SQ | cut -f 2-3 | cut -d ':' -f 2 | cut -f 1 > tmp
samtools view -H {input} | grep SQ | cut -f 2-3 | cut -d ':' -f 2,3 | cut -d ':' -f 2 > tmp2
paste tmp tmp2 > {output}
rm tmp tmp2
"""
rule peak_calling:
input:
map_sorted = WORKING_DIR + "sort/{samples}.sorted.bam",
map_sorted_indexed = WORKING_DIR + "index/{samples}_map.sorted.bam.bai"
output:
gapped_peak = RESULT_DIR + "peaks/{samples}_peaks.gappedPeak"
params:
genome_info = str(config['hmmratac']['genome.info']),
upper = 20,
lower = 10,
output_preflix = "{samples}"
log:
RESULT_DIR + "logs/peak_calling/{samples}.log"
conda:
"envs/hmmratac.yaml"
shell:
"""
HMMRATAC -b {input.map_sorted} -i {input.map_sorted_indexed} -g {params.genome_info} --output {params.output_preflix} --upper {params.upper} --lower {params.lower} 2&>{log}
"""
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment