Commit 60251e93 authored by WJH58's avatar WJH58
Browse files

Working pipeline until trimmomatic

parent 71eab569
.DS_Store .DS_Store
ATAC-seq_pipeline/temp/* ATAC-seq_pipeline/temp/*
ATAC-seq_pipeline/.snakemake/*
...@@ -6,7 +6,7 @@ results_dir: "results/" ...@@ -6,7 +6,7 @@ results_dir: "results/"
data_dir: "../data/" data_dir: "../data/"
sample : "units.tsv" units : "units.tsv"
## genome reference ## ## genome reference ##
......
...@@ -2,4 +2,3 @@ channels: ...@@ -2,4 +2,3 @@ channels:
- bioconda - bioconda
dependencies: dependencies:
- bowtie2=2.4.2 - bowtie2=2.4.2
- samtools=1.11
...@@ -20,8 +20,7 @@ GENOME_FASTA_URL = config["genome_fasta_url"] ...@@ -20,8 +20,7 @@ GENOME_FASTA_URL = config["genome_fasta_url"]
#units = pd.read_table(config["units"], dtype=str).set_index(["bed"], drop=False) #units = pd.read_table(config["units"], dtype=str).set_index(["bed"], drop=False)
#BED = units.index.get_level_values('bed').unique().tolist() #BED = units.index.get_level_values('bed').unique().tolist()
units = pd.read_table(config["units"], dtype=str).set_index(["sample"], drop=False)
units = pd.read_table(config["sample"], dtype=str).set_index(["sample"], drop=False)
SAMPLES = units.index.get_level_values('sample').unique().tolist() SAMPLES = units.index.get_level_values('sample').unique().tolist()
...@@ -29,13 +28,24 @@ SAMPLES = units.index.get_level_values('sample').unique().tolist() ...@@ -29,13 +28,24 @@ SAMPLES = units.index.get_level_values('sample').unique().tolist()
# Helper Functions # Helper Functions
############### ###############
def get_fastq(wildcards): def get_fastq(wildcards):
return units.loc[(sample), ["fq1", "fq2"]].dropna() return units.loc[(wildcards.samples), ["fq1", "fq2"]].dropna()
################## DESIRED OUTPUT ##################
FASTQC = expand(RESULT_DIR + "fastqc/{samples}_{R}_2000reads_fastqc.html", samples = SAMPLES, R=['R1', 'R2']) ##############
# Wildcards
##############
wildcard_constraints:
sample = "[A-Za-z0-9]+"
wildcard_constraints:
unit = "L[0-9]+"
################## DESIRED OUTPUT ##################
FASTQC = expand(RESULT_DIR + "fastqc/{samples}_{R}_2000reads_fastqc.html", samples = SAMPLES, R=['R1', 'R2']),
FORWARD_READS = expand(WORKING_DIR + "trimmed/{samples}_forward.fastq.gz", samples = SAMPLES),
REVERSE_READS = expand(WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz", samples = SAMPLES),
rule all: rule all:
input: input:
...@@ -43,9 +53,9 @@ rule all: ...@@ -43,9 +53,9 @@ rule all:
WORKING_DIR + "reference", WORKING_DIR + "reference",
[WORKING_DIR + "genome." + str(i) + ".bt2" for i in range(1,4)], [WORKING_DIR + "genome." + str(i) + ".bt2" for i in range(1,4)],
WORKING_DIR + "genome.rev.1.bt2", WORKING_DIR + "genome.rev.1.bt2",
WORKING_DIR + "genome.rev.2.bt2" WORKING_DIR + "genome.rev.2.bt2",
WORKING_DIR + "trimmed/{samples}_forward.fastq.gz", FORWARD_READS,
WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz", REVERSE_READS
message : "Analysis is complete!" message : "Analysis is complete!"
shell:"" shell:""
...@@ -79,9 +89,9 @@ rule trimmomatic: ...@@ -79,9 +89,9 @@ rule trimmomatic:
forward_reads = WORKING_DIR + "trimmed/{samples}_forward.fastq.gz", forward_reads = WORKING_DIR + "trimmed/{samples}_forward.fastq.gz",
reverse_reads = WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz", reverse_reads = WORKING_DIR + "trimmed/{samples}_reverse.fastq.gz",
forwardUnpaired = temp(WORKING_DIR + "trimmed/{samples}_forward_unpaired.fastq.gz"), forwardUnpaired = temp(WORKING_DIR + "trimmed/{samples}_forward_unpaired.fastq.gz"),
reverseUnpaired = temp(WORKING_DIR + "trimmed/{samples}_reverse_unpaired.fastq.gz") reverseUnpaired = temp(WORKING_DIR + "trimmed/{samples}_reverse_unpaired.fastq.gz"),
log: log:
RESULT_DIR + "logs/trimmomatic/{sample}.log" RESULT_DIR + "logs/trimmomatic/{samples}.log"
params: params:
seedMisMatches = str(config['trimmomatic']['seedMisMatches']), seedMisMatches = str(config['trimmomatic']['seedMisMatches']),
palindromeClipTreshold = str(config['trimmomatic']['palindromeClipTreshold']), palindromeClipTreshold = str(config['trimmomatic']['palindromeClipTreshold']),
...@@ -94,19 +104,20 @@ rule trimmomatic: ...@@ -94,19 +104,20 @@ rule trimmomatic:
phred = str(config["trimmomatic"]["phred"]) phred = str(config["trimmomatic"]["phred"])
threads: 10 threads: 10
conda: conda:
"../envs/trimmomatic.yaml" "envs/trimmomatic.yaml"
shell: shell:
"trimmomatic PE {params.phred} -threads {threads} " """
"{input.reads} " java -jar trimmomatic-0.32.jar PE \
"{output.forward_reads} " -threads 10 \
"{output.forwardUnpaired} " -phred33 \
"{output.reverse_reads} " {input.reads} \
"{output.reverseUnpaired} " {output.forward_reads} {output.forwardUnpaired} {output.reverse_reads} {output.reverseUnpaired} \
"ILLUMINACLIP:{input.adapters}:{params.seedMisMatches}:{params.palindromeClipTreshold}:{params.simpleClipThreshhold} " ILLUMINACLIP:{input.adapters}:{params.seedMisMatches}:{params.palindromeClipTreshold}:{params.simpleClipThreshhold} \
"LEADING:{params.LeadMinTrimQual} " LEADING:3 \
"TRAILING:{params.TrailMinTrimQual} " TRAILING:3 \
"SLIDINGWINDOW:{params.windowSize}:{params.avgMinQual} " SLIDINGWINDOW:4:15 \
"MINLEN:{params.minReadLen} &>{log}" MINLEN:40 &>{log}
"""
rule fastqc: rule fastqc:
input: input:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment