Commit 25396c69 authored by Beatrice Tan's avatar Beatrice Tan
Browse files

Split recurrent regions rules into seperate files.

parent a6e291a4
......@@ -8,10 +8,12 @@ sys.path.insert(0, workflow.basedir + "/scripts")
#Rules to run pipeline for prioritization of regions and genes.
include: "rules/PreprocessInput.smk"
include: "rules/RecurrentRegions.smk"
include: "rules/GISTIC2.smk"
include: "rules/Rubic.smk"
include: "rules/GenePrioritization.smk"
#Rules to compare different inputs.
include: "rules/CopmarisonRegions.smk"
include: "rules/SampleSizes.smk"
include: "rules/UseControl.smk"
......@@ -50,3 +52,35 @@ rule help:
run:
for rule in workflow.rules:
print('- ' + rule.name + "\t" + rule.docstring)
rule report:
"""Write html report on segmentation file."""
input:
seg="Reports/Segments.txt",
tools="Reports/Tools.txt",
table_regions="Reports/Recurrent_regions.txt",
genes_both="Reports/Genes_both.txt",
genes_gistic="Reports/Genes_GISTIC2.txt",
genes_rubic="Reports/Genes_RUBIC.txt",
overlap="Reports/Overlap_genes.txt",
venn="Reports/Venn_overlap_genes.png",
swarmplot="Reports/Swarmplot_sizes.png",
circos="Reports/Circos/RecurrentRegions.png",
circos_legend="Reports/Circos/RecurrentRegions_legend.png"
output:
html="Reports/Results.html"
run:
from snakemake.utils import report
with open(input.seg, 'r') as seg:
nr_samples = seg.readline().split("\t")[1].strip()
report("""
====================================================
Report on the results of the prioritization pipeline
====================================================
In total, {nr_samples} samples were present in the raw segmentation file.
See: Table T1_
""", output.html, metadata="Beatrice F. Tan (beatrice.ftan@gmail.com)", T1=input[0])
#**input)
......@@ -7,3 +7,4 @@ dependencies:
- matplotlib-venn =0.11.5
- pyensembl =1.1.0
- seaborn =0.8.1
- pillow =5.0.0
from Rubic import MarkerFile
from Reports import ReportTools
import os.path
import datetime
from Circos import InputCircos
rule install_gistic:
"""Install GISTIC2 to a directory of choice."""
output:
config["gisticdir"]
shell:
"{workflow.basedir}/scripts/install_gistic2.sh {output}"
rule run_gistic:
"""Run GISTIC2 for the tumor segmentation data."""
input:
gistic_directory=config["gisticdir"],
seg="Input/Segments_tumor.txt"
output:
"GISTIC/all_lesions.conf_" + config["gistic_precision"] + ".txt"
params:
cnv="",
ref=config["reference"],
ref_file="",
extra=config["settings_gistic"]
benchmark:
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" + workflow.basedir + "/wrappers/GISTIC2"
rule markers_rubic:
"""Make marker file to use as input for RUBIC based on segmentation file (start, center and end positions of each segment)."""
input:
"Input/Segments_tumor.txt"
output:
"Input/Markers.txt"
run:
MarkerFile(input[0], output[0])
rule run_rubic:
"""Run RUBIC for the tumor segmentation data."""
input:
seg="Input/Segments_tumor.txt",
markers="Input/Markers.txt"
output:
out_gains="RUBIC/gains.txt",
out_losses="RUBIC/losses.txt",
out_plots="RUBIC/plots"
params:
fdr="0.25",
genefile=os.path.join(workflow.basedir, config["biomart_genes"]) if config["biomart_genes"].startswith("input_files") else config["bimart_genes"]
benchmark:
"Benchmarks/RUBIC." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" + workflow.basedir +"/wrappers/rubic"
rule circos_input:
"""Make input files for making a circos diagram."""
input:
......@@ -83,6 +32,36 @@ rule make_circos:
shell:
"circos -conf {params[0]} -outputfile {output[0]} -param cnv_file={input.seg} -param gistic_file={input.gistic} -param rubic_file={input.rubic}"
rule make_legend_circos:
output:
"Reports/Circos/legend.png"
run:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
gains = mpatches.Patch(color='#74C476', label='Gains')
losses = mpatches.Patch(color='#FB6A4A', label='Losses')
gistic = mpatches.Patch(color='#5975A4', label='GISTIC2.0 regions')
rubic = mpatches.Patch(color='#5F9E6E', label='RUBIC regions')
legend = plt.legend(handles=[gains, losses, gistic, rubic], loc=3, framealpha=1, frameon=False)
fig = legend.figure
fig.canvas.draw()
bbox = legend.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
fig.savefig(output[0], dpi=400, bbox_inches=bbox)
rule add_legend_circos:
input:
"Reports/Circos/RecurrentRegions.png",
"Reports/Circos/legend.png"
output:
"Reports/Circos/RecurrentRegions_legend.png"
run:
from PIL import Image
circos = Image.open(input[0], 'r')
c_w, c_h = circos.size
legend = Image.open(input[1], 'r')
l_w, l_h = legend.size
offset = ((c_w - l_w), 0) circos.paste(legend, offset)
circos.save(output[0])
rule report_tools:
"""Report the differences in calls between GISTIC2 and RUBIC."""
......@@ -111,32 +90,12 @@ rule report_tools:
output.swarmplot,
output.genes_both, output.genes_gistic, output.genes_rubic)
rule report:
"""Write html report on segmentation file."""
rule bed_intersect:
"""Intersect the recurrent regions detected by RUBIC and GISTIC2.0."""
input:
seg="Reports/Segments.txt",
tools="Reports/Tools.txt",
table_regions="Reports/Recurrent_regions.txt",
genes_both="Reports/Genes_both.txt",
genes_gistic="Reports/Genes_GISTIC2.txt",
genes_rubic="Reports/Genes_RUBIC.txt",
overlap="Reports/Overlap_genes.txt",
venn="Reports/Venn_overlap_genes.png",
swarmplot="Reports/Swarmplot_sizes.png"
gistic="GISTIC/regions_track.conf_" + config["gistic_precision"] + ".bed",
rubic="RUBIC/regions_track.bed"
output:
html="Reports/Results.html"
run:
from snakemake.utils import report
with open(input.seg, 'r') as seg:
nr_samples = seg.readline().split("\t")[1].strip()
report("""
====================================================
Report on the results of the prioritization pipeline
====================================================
In total, {nr_samples} samples were present in the raw segmentation file.
See: Table T1_
""", output.html, metadata="Beatrice F. Tan (beatrice.ftan@gmail.com)", T1=input[0])
#**input)
"Reports/Overlap_regions.bed"
shell:
"bedtools intersect -a {input.gistic} -b {input.rubic} -wo > {output}"
rule install_gistic:
"""Install GISTIC2 to a directory of choice."""
output:
config["gisticdir"]
shell:
"{workflow.basedir}/scripts/install_gistic2.sh {output}"
rule run_gistic:
"""Run GISTIC2 for the tumor segmentation data."""
input:
gistic_directory=config["gisticdir"],
seg="Input/Segments_tumor.txt"
output:
"GISTIC/all_lesions.conf_" + config["gistic_precision"] + ".txt"
params:
cnv="",
ref=config["reference"],
ref_file="",
extra=config["settings_gistic"]
benchmark:
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" + workflow.basedir + "/wrappers/GISTIC2"
from Rubic import MarkerFile
rule markers_rubic:
"""Make marker file to use as input for RUBIC based on segmentation file (start, center and end positions of each segment)."""
input:
"Input/Segments_tumor.txt"
output:
"Input/Markers.txt"
run:
MarkerFile(input[0], output[0])
rule run_rubic:
"""Run RUBIC for the tumor segmentation data."""
input:
seg="Input/Segments_tumor.txt",
markers="Input/Markers.txt"
output:
out_gains="RUBIC/gains.txt",
out_losses="RUBIC/losses.txt",
out_plots="RUBIC/plots"
params:
fdr="0.25",
genefile=os.path.join(workflow.basedir, config["biomart_genes"]) if config["biomart_genes"].startswith("input_files") else config["bimart_genes"]
benchmark:
"Benchmarks/RUBIC." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" + workflow.basedir +"/wrappers/rubic"
......@@ -185,7 +185,7 @@ class ReportTools:
f, ax = plt.subplots(figsize=(7, 7))
ax.set(yscale="log")
g = sns.boxplot(x="Tool", y="Size", hue="CNV type", data=size_list, whis=np.inf, palette="deep")
g = sns.swarmplot(x="Tool", y="Size", split=True, hue="CNV type", data=size_list, size=5, palette={"Amplification": "#3f3f3f", "Deletion": "#3f3f3f"})
g = sns.swarmplot(x="Tool", y="Size", dodge=True, hue="CNV type", data=size_list, size=5, palette={"Amplification": "#3f3f3f", "Deletion": "#3f3f3f"})
sns.despine()
g.set_xlabel("Tool",fontsize=16)
g.set_ylabel("Log size of recurrent regions",fontsize=16)
......
......@@ -3,7 +3,7 @@
<<include ticks.conf>>
<image>
angle_offset* = -82
#angle_offset* = -82
<<include etc/image.conf>>
</image>
......@@ -91,7 +91,7 @@ stroke_thickness = 2
</rules>
</plot>
<plot> #histogram GISTIC
<plot> #GISTIC
show = yes
type = histogram
max_gap = 1u
......@@ -100,70 +100,58 @@ min = 0
max = 20
r0 = 0.05r
r1 = 0.615r
color = black_a4
#thickness = 2
fill_color = black_a4
color = 89,117,164,0.5
fill_color = 89,117,164,0.5
orientation = in
</plot>
<plot> #histogram RUBIC
show = yes
type = histogram
max_gap = 1u
file = conf(rubic_file)
color = 255,0,255,0.5
min = 0
max = 20
r0 = 0.05r
r1 = 0.615r
orientation = in
#thickness = 2
fill_color = 255,0,255,0.5
</plot>
# scatter plot for values [-3,3] turned into a heat map
# by using r0=r1
<plot>
type = scatter
file = conf(gistic_file)
r0 = 0.640r
r1 = 0.640r
min = 0
max = 30
max = 1000
glyph = square
glyph_size = 12
fill_color = black
fill_color = 89,117,164
<rules>
<rule>
condition = 1
fill_color = black #eval( "blue_a" . remap_int(var(value),0,30,0,30) )
fill_color = 89,117,164
</rule>
</rules>
</plot>
# scatter plot for values [0,3] turned into a heat map
# by using r0=r1
<plot> #RUBIC
show = yes
type = histogram
max_gap = 1u
file = conf(rubic_file)
color = 95,158,110,0.5
min = 0
max = 20
r0 = 0.05r
r1 = 0.615r
orientation = in
fill_color = 95,158,110,0.5
</plot>
<plot>
type = scatter
file = conf(rubic_file)
r0 = 0.625r
r1 = 0.625r
min = 0
max = 30
max = 1000
glyph = square
glyph_size = 12
fill_color = black
fill_color = 95,158,110
<rules>
<rule>
condition = 1
fill_color = 255,0,255 #eval( "gray_a" . remap_int(var(value),0,30,0,30) )
fill_color = 95,158,110
</rule>
</rules>
</plot>
......
......@@ -8,9 +8,9 @@ axis_break_at_edge = yes
axis_break = yes
axis_break_style = 2
<pairwise hs22 hs1>
spacing = 50r
</pairwise>
#<pairwise hs22 hs1>
#spacing = 10r
#</pairwise>
<break_style 1>
stroke_color = black
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment