Commit 71eab569 authored by WJH58's avatar WJH58
Browse files

trimmomatic added

parent c7001b9f
SyntaxError in line 47 of /Users/flora/Desktop/Snakemake_ATAC_2020/ATAC-seq_pipeline/Snakefile:
invalid syntax
SyntaxError in line 47 of /Users/flora/Desktop/Snakemake_ATAC_2020/ATAC-seq_pipeline/Snakefile:
invalid syntax
SyntaxError in line 47 of /Users/flora/Desktop/Snakemake_ATAC_2020/ATAC-seq_pipeline/Snakefile:
invalid syntax
SyntaxError in line 47 of /Users/flora/Desktop/Snakemake_ATAC_2020/ATAC-seq_pipeline/Snakefile:
invalid syntax
>Illumina_Single_End_Adapter1
GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG
>Illumina_Single_End_Adapter2
CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
>Illumina_Single_End_PCR_primer1
AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
>Illumina_Single_End_PCR_primer2
CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
>Illumina_Paired_End_Adapter1
ACACTCTTTCCCTACACGACGCTCTTCCGATCT
>Illumina_Paired_End_Adapter2
GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
>Illumina_Paired_End_PCR_Primer1
AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
>Illumina_Paired_End_PCR_Primer2
CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
>Illumina_Paired_End_Sequencing_Primer1
ACACTCTTTCCCTACACGACGCTCTTCCGATCT
>Illumina_Paired_End_Sequencing_Primer2
CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
>TruSeq_Universal_Adapter
AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
>TruSeqAdapter_Index1
GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG
>TruSeq_Adapter_Index2
GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG
>TruSeq_Adapter_Index3
GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG
>TruSeq_Adapter_Index4
GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG
>TruSeq_Adapter_Index5
GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG
>TruSeq_Adapter_Index6
GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG
>TruSeq_Adapter_Index7
GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG
>TruSeq_Adapter_Index8
GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG
>TruSeq_Adapter_Index9
GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGC
TTG
>TruSeq_Adapter_Index10
GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGC
TTG
>TruSeq_Adapter_Index11
GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGC
TTG
>TruSeq_Adapter_Index12
GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGC
TTG
>TruSeq_Adapter_Index13
GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTCAACAATCTCGTATGCCGTCTTCT
GCTTG
>TruSeq_Adapter_Index14
GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTTCCGTATCTCGTATGCCGTCTTCT
GCTTG
>TruSeq_Adapter_Index15
GATCGGAAGAGCACACGTCTGAACTCCAGTCACATGTCAGAATCTCGTATGCCGTCTTCT
GCTTG
>TruSeq_Adapter_Index16
GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCGTCCCGATCTCGTATGCCGTCTTCT
GCTTG
>TruSeq_Adapter_Index18
GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCACATCTCGTATGCCGTCTTCT
GCTTG
>TruSeq_Adapter_Index19
GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGAAACGATCTCGTATGCCGTCTTCT
GCTTG
>TruSeq_Adapter_Index20
GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGGCCTTATCTCGTATGCCGTCTTCT
GCTTG
>TruSeq_Adapter_Index21
GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTTTCGGAATCTCGTATGCCGTCTTCT
GCTTG
>TruSeq_Adapter_Index22
GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGTACGTAATCTCGTATGCCGTCTTCT
GCTTG
>TruSeq_Adapter_Index23
GATCGGAAGAGCACACGTCTGAACTCCAGTCACGAGTGGATATCTCGTATGCCGTCTTCT
GCTTG
>TruSeq_Adapter_Index25
GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTGATATATCTCGTATGCCGTCTTCT
GCTTG
>TruSeq_Adapter_Index27
GATCGGAAGAGCACACGTCTGAACTCCAGTCACATTCCTTTATCTCGTATGCCGTCTTCT
GCTTG
......@@ -15,5 +15,14 @@ genome_fasta_url : "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/rele
## trimmomatic ##
adapters: "adapters.fasta"
trimmomatic:
adapters: "adapters.fasta"
seedMisMatches: '2'
palindromeClipTreshold: '30'
simpleClipThreshold: '10'
LeadMinTrimQual: '3'
TrailMinTrimQual: '3'
windowSize: '4'
avgMinQual: '15'
minReadLength: '40'
phred: '-phred33'
channels:
- conda-forge
- bioconda
- r
- defaults
dependencies:
- trimmomatic=0.38
#######Configuration files#######
GENOME_FASTA_URL = config[‘genome_fasta_url’]
WORKING_DIR = config[working_dir]
RESULT_DIR = config[results_dir]
#######Rules###########
rule download_genome:
output:
WORKING_DIR + “reference.fa”
shell:
“wget -o {output} {GENOME_FASTA_URL}”
# download the file from GENOME_FASTA_URL link and save the file to {output}
rule fastqc:
input:
fwd = WORKING_DIR + “{samples}_fwd.fq”
rev = WORKING_DIR + “{samples}_rev.fq”
output:
R1 = RESULT_DIR + “fastqc/{samples}_fwd.fq.gz”
R2 = RESULT_DIR + “fastqc/{samples}_rev.fq.gz”
shell:
“fastqc -o {output} -t 10 {input}”
# fastqc [-o output dir] [--(no)extract] [-f fastq|bam|sam] [-c contaminant file] seqfile1 .. seqfileN
rule trimmomatic:
input:
R1 = RESULT_DIR + “fastqc/{samples}_fwd.fq.gz”
R2 = RESULT_DIR + “fastqc/{samples}_rev.fq.gz”
output:
forward = WORKING_DIR + “trimmed/{samples}_forward.fq.gz”
reverse = WORKING_DIR + “trimmed/{samples}_reverse.fq.gz”
shell:
“java -jar trimmomatic-0.35.jar PE -phred33 {input} {output}”
rule fastqc2:
input:
forward = WORKING_DIR + “trimmed/{samples}_forward.fq.gz”
reverse = WORKING_DIR + “trimmed/{samples}_reverse.fq.gz”output:
RESULT_DIR + “fastqc/{samples}_forward_posttrim.fq.gz”
RESULT_DIR + “fastqc/{samples}_reverse_posttrim.fq.gz”
shell:
“fastqc -o {output} -t 6 {input}”
rule index:
input:
WORKING_DIR + “reference.fa”
output:
WORKING_DIR + “genome.rev.1.bt2”
WORKING_DIR + “genome.rev.2.bt2”
message:
“index reference genome”
params:
WORKING_DIR + “genome_index”
shell:
“bowtie2-build –threads 10 {input}{params}”
rule align:
input:
index = WORKING_DIR + “genome_index”
forward = WORKING_DIR + “trimmed/{samples}_forward.fq.gz”
reverse = WORKING_DIR + “trimmed/{samples}_reverse.fq.gz”
params:
index = WORKING_DIR + “genome_index”
output:
WORKING_DIR + “mapped/{sample}.bam”
message:
“mapping samples to reference genome”
shell:
“bowtie2 -t 10 {params.index} -1 {input.forward} -2{input.reverse} | samtools view -Sb - > {output}”
......@@ -44,6 +44,8 @@ rule all:
[WORKING_DIR + "genome." + str(i) + ".bt2" for i in range(1,4)],
WORKING_DIR + "genome.rev.1.bt2",
WORKING_DIR + "genome.rev.2.bt2"
WORKING_DIR + "trimmed/{samples}_forward.fastq.gz",
WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz",
message : "Analysis is complete!"
shell:""
......@@ -71,7 +73,40 @@ rule index_genome:
rule trimmomatic:
input:
reads = get_fastq,
adapters = config['trimmomatic']["adapters"]
output:
forward_reads = WORKING_DIR + "trimmed/{samples}_forward.fastq.gz",
reverse_reads = WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz",
forwardUnpaired = temp(WORKING_DIR + "trimmed/{samples}_forward_unpaired.fastq.gz"),
reverseUnpaired = temp(WORKING_DIR + "trimmed/{samples}_reverse_unpaired.fastq.gz")
log:
RESULT_DIR + "logs/trimmomatic/{sample}.log"
params:
seedMisMatches = str(config['trimmomatic']['seedMisMatches']),
palindromeClipTreshold = str(config['trimmomatic']['palindromeClipTreshold']),
simpleClipThreshhold = str(config['trimmomatic']['simpleClipThreshold']),
LeadMinTrimQual = str(config['trimmomatic']['LeadMinTrimQual']),
TrailMinTrimQual = str(config['trimmomatic']['TrailMinTrimQual']),
windowSize = str(config['trimmomatic']['windowSize']),
avgMinQual = str(config['trimmomatic']['avgMinQual']),
minReadLen = str(config['trimmomatic']['minReadLength']),
phred = str(config["trimmomatic"]["phred"])
threads: 10
conda:
"../envs/trimmomatic.yaml"
shell:
"trimmomatic PE {params.phred} -threads {threads} "
"{input.reads} "
"{output.forward_reads} "
"{output.forwardUnpaired} "
"{output.reverse_reads} "
"{output.reverseUnpaired} "
"ILLUMINACLIP:{input.adapters}:{params.seedMisMatches}:{params.palindromeClipTreshold}:{params.simpleClipThreshhold} "
"LEADING:{params.LeadMinTrimQual} "
"TRAILING:{params.TrailMinTrimQual} "
"SLIDINGWINDOW:{params.windowSize}:{params.avgMinQual} "
"MINLEN:{params.minReadLen} &>{log}"
rule fastqc:
input:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment