Commit 192cd842 authored by bow's avatar bow
Browse files

Update ITD module to also analyze KMT2A

parent c0a5dbac
......@@ -46,6 +46,21 @@ settings:
#flt3_start:
#flt3_end:
# Path to the FASTA file containing the KMT2A transcript sequence.
# This may contain more than one transcripts (up to the entire transcriptome).
#kmt2a_fasta:
# Path to a BWA index containing the KMT2A transcript sequence.
# This must be generated from the `kmt2a_fasta` file.
#kmt2a_bwa_index:
# Name of the KMT2A transcript in `kmt2a_fasta`.
#kmt2a_name:
# Start and end coordinates (1-based, fully closed) of the region in the KMT2A transcript for ITD detection.
# kmt2a_start: 456
# kmt2a_end: 4719
# Path to the `fidus` executable used for detection of FLT3 ITD.
#fidus_exe:
......
......@@ -3,23 +3,23 @@ from rattle import Run
RUN = Run(config)
include: "includes/qc-seq/Snakefile"
include: "includes/itd-flt3/Snakefile"
include: "includes/snv-indels/Snakefile"
include: "includes/fusion/Snakefile"
include: "includes/expression/Snakefile"
include: "includes/itd/Snakefile"
OUTPUTS = dict(
# Merged FASTQs
fqs="{sample}/{sample}-{pair}.fq.gz",
fqs_stats="{sample}/qc-seq/{sample}-seq-stats.json",
flt3_bam="{sample}/itd-flt3/{sample}.flt3.bam",
flt3_csv="{sample}/itd-flt3/{sample}.flt3.csv",
flt3_bg_csv="{sample}/itd-flt3/{sample}.bg.csv",
flt3_png="{sample}/itd-flt3/{sample}.flt3.png",
# Small variants
smallvars_bam="{sample}/snv-indels/{sample}.snv-indel.bam",
smallvars_vcf="{sample}/snv-indels/{sample}.annotated.vcf.gz",
smallvars_csv_all="{sample}/snv-indels/{sample}.variants_all.csv",
smallvars_csv_hi="{sample}/snv-indels/{sample}.variants_hi.csv",
smallvars_plots="{sample}/snv-indels/variant_plots/.done",
# Fusion
star_fusion_txt="{sample}/fusion/{sample}.star-fusion",
fusioncatcher_txt="{sample}/fusion/{sample}.fusioncatcher",
star_fusion_svg="{sample}/fusion/{sample}.star-fusion.svg",
......@@ -28,14 +28,29 @@ OUTPUTS = dict(
isect_svg="{sample}/fusion/{sample}.sf-isect.svg",
isect_txt="{sample}/fusion/{sample}.sf-isect",
fusions_svg="{sample}/fusion/{sample}.fusions-combined.svg",
# Expression
count_fragments_per_gene="{sample}/expression/{sample}.fragments_per_gene",
count_bases_per_gene="{sample}/expression/{sample}.bases_per_gene",
count_bases_per_exon="{sample}/expression/{sample}.bases_per_exon",
ratio_exons="{sample}/expression/{sample}.exon_ratios",
# Stats
fqs_stats="{sample}/qc-seq/{sample}-seq-stats.json",
rna_stats="{sample}/snv-indels/{sample}.rna_stats",
aln_stats="{sample}/snv-indels/{sample}.aln_stats",
insert_stats="{sample}/snv-indels/{sample}.insert_stats",
)
# ITD module
flt3_bam="{sample}/itd/{sample}.flt3.bam",
flt3_csv="{sample}/itd/{sample}.flt3.csv",
flt3_bg_csv="{sample}/itd/{sample}.flt3.bg.csv",
flt3_png="{sample}/itd/{sample}.flt3.png",
kmt2a_bam="{sample}/itd/{sample}.kmt2a.bam",
kmt2a_csv="{sample}/itd/{sample}.kmt2a.csv",
kmt2a_bg_csv="{sample}/itd/{sample}.kmt2a.bg.csv",
kmt2a_png="{sample}/itd/{sample}.kmt2a.png",
)
rule all:
......
include: srcdir("Snakefile.flt3")
include: srcdir("Snakefile.kmt2a")
......@@ -14,8 +14,8 @@ RUN.set_default_setting("flt3_end", 2024)
rule all_flt3:
input:
flt3_sc_jsons=expand(RUN.output("{sample}/itd-flt3/{sample}.flt3-sc.json"), sample=RUN.samples),
flt3_sc_plots=expand(RUN.output("{sample}/itd-flt3/{sample}.flt3-sc.png"), sample=RUN.samples),
flt3_sc_jsons=expand(RUN.output("{sample}/itd/{sample}.flt3-sc.json"), sample=RUN.samples),
flt3_sc_plots=expand(RUN.output("{sample}/itd/{sample}.flt3-sc.png"), sample=RUN.samples),
rule align_flt3:
input:
......@@ -23,12 +23,12 @@ rule align_flt3:
fq2=RUN.output("{sample}/{sample}-R2.fq.gz"),
index=RUN.settings["flt3_bwa_index"]
output:
bam=RUN.output("{sample}/itd-flt3/{sample}.flt3.bam"),
bai=RUN.output("{sample}/itd-flt3/{sample}.flt3.bai"),
bam=RUN.output("{sample}/itd/{sample}.flt3.bam"),
bai=RUN.output("{sample}/itd/{sample}.flt3.bai"),
params:
rg="@RG\\tID:{sample}\\tSM:{sample}"
threads: 3
conda: srcdir("envs/align_flt3.yml")
conda: srcdir("envs/align_itd.yml")
shell:
"bwa mem -R \'{params.rg}\' -t {threads} -L 2,2 {input.index} {input.fq1} {input.fq2}"
" | samtools view -Sbh -F 0x4 -"
......@@ -36,13 +36,13 @@ rule align_flt3:
rule detect_flt3:
input:
bam=RUN.output("{sample}/itd-flt3/{sample}.flt3.bam"),
bai=RUN.output("{sample}/itd-flt3/{sample}.flt3.bai"),
bam=RUN.output("{sample}/itd/{sample}.flt3.bam"),
bai=RUN.output("{sample}/itd/{sample}.flt3.bai"),
ref=RUN.settings["flt3_fasta"],
exe=RUN.settings["fidus_exe"],
output:
csv=RUN.output("{sample}/itd-flt3/{sample}.flt3.csv"),
bg_csv=RUN.output("{sample}/itd-flt3/{sample}.bg.csv"),
csv=RUN.output("{sample}/itd/{sample}.flt3.csv"),
bg_csv=RUN.output("{sample}/itd/{sample}.flt3.bg.csv"),
threads: 1
params:
trx_name=RUN.settings["flt3_name"],
......@@ -51,13 +51,13 @@ rule detect_flt3:
shell:
"{input.exe} -r {params.trx_name}:{params.start}-{params.end} --bg-counts {output.bg_csv} {input.ref} {input.bam} > {output.csv}"
rule plot_itd:
rule plot_itd_flt3:
input:
csv=RUN.output("{sample}/itd-flt3/{sample}.flt3.csv"),
bg_csv=RUN.output("{sample}/itd-flt3/{sample}.bg.csv"),
csv=RUN.output("{sample}/itd/{sample}.flt3.csv"),
bg_csv=RUN.output("{sample}/itd/{sample}.flt3.bg.csv"),
scr=RUN.settings["plot_itd"],
output:
png=RUN.output("{sample}/itd-flt3/{sample}.flt3.png"),
png=RUN.output("{sample}/itd/{sample}.flt3.png"),
threads: 1
conda: RUN.settings["plot_itd_conda"]
shell:
......
from os import path
from rattle import Run
RUN = Run(config)
# Region of interest ~ exon 2-10 in transcript coordinates.
RUN.set_default_setting("kmt2a_name", "KMT2A-213")
RUN.set_default_setting("kmt2a_start", 456)
RUN.set_default_setting("kmt2a_end", 4719)
rule all_kmt2a:
input:
kmt2a_sc_jsons=expand(RUN.output("{sample}/itd/{sample}.kmt2a-sc.json"), sample=RUN.samples),
kmt2a_sc_plots=expand(RUN.output("{sample}/itd/{sample}.kmt2a-sc.png"), sample=RUN.samples),
rule align_kmt2a:
input:
fq1=RUN.output("{sample}/{sample}-R1.fq.gz"),
fq2=RUN.output("{sample}/{sample}-R2.fq.gz"),
index=RUN.settings["kmt2a_bwa_index"]
output:
bam=RUN.output("{sample}/itd/{sample}.kmt2a.bam"),
bai=RUN.output("{sample}/itd/{sample}.kmt2a.bai"),
params:
rg="@RG\\tID:{sample}\\tSM:{sample}"
threads: 3
conda: srcdir("envs/align_itd.yml")
shell:
"bwa mem -R \'{params.rg}\' -t {threads} -L 2,2 {input.index} {input.fq1} {input.fq2}"
" | samtools view -Sbh -F 0x4 -"
" | picard SortSam I=/dev/stdin O={output.bam} SO=coordinate CREATE_INDEX=true"
rule detect_kmt2a:
input:
bam=RUN.output("{sample}/itd/{sample}.kmt2a.bam"),
bai=RUN.output("{sample}/itd/{sample}.kmt2a.bai"),
ref=RUN.settings["kmt2a_fasta"],
exe=RUN.settings["fidus_exe"],
output:
csv=RUN.output("{sample}/itd/{sample}.kmt2a.csv"),
bg_csv=RUN.output("{sample}/itd/{sample}.kmt2a.bg.csv"),
threads: 1
params:
trx_name=RUN.settings["kmt2a_name"],
start=RUN.settings["kmt2a_start"],
end=RUN.settings["kmt2a_end"],
shell:
"{input.exe} -r {params.trx_name}:{params.start}-{params.end} --bg-counts {output.bg_csv} {input.ref} {input.bam} > {output.csv}"
rule plot_itd_kmt2a:
input:
csv=RUN.output("{sample}/itd/{sample}.kmt2a.csv"),
bg_csv=RUN.output("{sample}/itd/{sample}.kmt2a.bg.csv"),
scr=RUN.settings["plot_itd"],
output:
png=RUN.output("{sample}/itd/{sample}.kmt2a.png"),
threads: 1
conda: RUN.settings["plot_itd_conda"]
shell:
"python {input.scr} tracks {input.csv} {input.bg_csv}"
" --sample-id {wildcards.sample} {output.png}"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment