Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Beatrice Tan
CNAprioritization
Commits
25396c69
Commit
25396c69
authored
Jan 30, 2018
by
Beatrice Tan
Browse files
Split recurrent regions rules into seperate files.
parent
a6e291a4
Changes
8
Hide whitespace changes
Inline
Side-by-side
Snakefile
View file @
25396c69
...
...
@@ -8,10 +8,12 @@ sys.path.insert(0, workflow.basedir + "/scripts")
#Rules to run pipeline for prioritization of regions and genes.
include: "rules/PreprocessInput.smk"
include: "rules/RecurrentRegions.smk"
include: "rules/GISTIC2.smk"
include: "rules/Rubic.smk"
include: "rules/GenePrioritization.smk"
#Rules to compare different inputs.
include: "rules/CopmarisonRegions.smk"
include: "rules/SampleSizes.smk"
include: "rules/UseControl.smk"
...
...
@@ -50,3 +52,35 @@ rule help:
run:
for rule in workflow.rules:
print('- ' + rule.name + "\t" + rule.docstring)
rule report:
"""Write html report on segmentation file."""
input:
seg="Reports/Segments.txt",
tools="Reports/Tools.txt",
table_regions="Reports/Recurrent_regions.txt",
genes_both="Reports/Genes_both.txt",
genes_gistic="Reports/Genes_GISTIC2.txt",
genes_rubic="Reports/Genes_RUBIC.txt",
overlap="Reports/Overlap_genes.txt",
venn="Reports/Venn_overlap_genes.png",
swarmplot="Reports/Swarmplot_sizes.png",
circos="Reports/Circos/RecurrentRegions.png",
circos_legend="Reports/Circos/RecurrentRegions_legend.png"
output:
html="Reports/Results.html"
run:
from snakemake.utils import report
with open(input.seg, 'r') as seg:
nr_samples = seg.readline().split("\t")[1].strip()
report("""
====================================================
Report on the results of the prioritization pipeline
====================================================
In total, {nr_samples} samples were present in the raw segmentation file.
See: Table T1_
""", output.html, metadata="Beatrice F. Tan (beatrice.ftan@gmail.com)", T1=input[0])
#**input)
envs/pipeline.yaml
View file @
25396c69
...
...
@@ -7,3 +7,4 @@ dependencies:
-
matplotlib-venn =0.11.5
-
pyensembl =1.1.0
-
seaborn =0.8.1
-
pillow =5.0.0
rules/
Recurrent
Regions.smk
→
rules/
Comparison
Regions.smk
View file @
25396c69
from Rubic import MarkerFile
from Reports import ReportTools
import os.path
import datetime
from Circos import InputCircos
rule install_gistic:
"""Install GISTIC2 to a directory of choice."""
output:
config["gisticdir"]
shell:
"{workflow.basedir}/scripts/install_gistic2.sh {output}"
rule run_gistic:
"""Run GISTIC2 for the tumor segmentation data."""
input:
gistic_directory=config["gisticdir"],
seg="Input/Segments_tumor.txt"
output:
"GISTIC/all_lesions.conf_" + config["gistic_precision"] + ".txt"
params:
cnv="",
ref=config["reference"],
ref_file="",
extra=config["settings_gistic"]
benchmark:
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" + workflow.basedir + "/wrappers/GISTIC2"
rule markers_rubic:
"""Make marker file to use as input for RUBIC based on segmentation file (start, center and end positions of each segment)."""
input:
"Input/Segments_tumor.txt"
output:
"Input/Markers.txt"
run:
MarkerFile(input[0], output[0])
rule run_rubic:
"""Run RUBIC for the tumor segmentation data."""
input:
seg="Input/Segments_tumor.txt",
markers="Input/Markers.txt"
output:
out_gains="RUBIC/gains.txt",
out_losses="RUBIC/losses.txt",
out_plots="RUBIC/plots"
params:
fdr="0.25",
genefile=os.path.join(workflow.basedir, config["biomart_genes"]) if config["biomart_genes"].startswith("input_files") else config["bimart_genes"]
benchmark:
"Benchmarks/RUBIC." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" + workflow.basedir +"/wrappers/rubic"
rule circos_input:
"""Make input files for making a circos diagram."""
input:
...
...
@@ -83,6 +32,36 @@ rule make_circos:
shell:
"circos -conf {params[0]} -outputfile {output[0]} -param cnv_file={input.seg} -param gistic_file={input.gistic} -param rubic_file={input.rubic}"
rule make_legend_circos:
output:
"Reports/Circos/legend.png"
run:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
gains = mpatches.Patch(color='#74C476', label='Gains')
losses = mpatches.Patch(color='#FB6A4A', label='Losses')
gistic = mpatches.Patch(color='#5975A4', label='GISTIC2.0 regions')
rubic = mpatches.Patch(color='#5F9E6E', label='RUBIC regions')
legend = plt.legend(handles=[gains, losses, gistic, rubic], loc=3, framealpha=1, frameon=False)
fig = legend.figure
fig.canvas.draw()
bbox = legend.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
fig.savefig(output[0], dpi=400, bbox_inches=bbox)
rule add_legend_circos:
input:
"Reports/Circos/RecurrentRegions.png",
"Reports/Circos/legend.png"
output:
"Reports/Circos/RecurrentRegions_legend.png"
run:
from PIL import Image
circos = Image.open(input[0], 'r')
c_w, c_h = circos.size
legend = Image.open(input[1], 'r')
l_w, l_h = legend.size
offset = ((c_w - l_w), 0) circos.paste(legend, offset)
circos.save(output[0])
rule report_tools:
"""Report the differences in calls between GISTIC2 and RUBIC."""
...
...
@@ -111,32 +90,12 @@ rule report_tools:
output.swarmplot,
output.genes_both, output.genes_gistic, output.genes_rubic)
rule report:
"""Write html report on segmentation file."""
rule bed_intersect:
"""Intersect the recurrent regions detected by RUBIC and GISTIC2.0."""
input:
seg="Reports/Segments.txt",
tools="Reports/Tools.txt",
table_regions="Reports/Recurrent_regions.txt",
genes_both="Reports/Genes_both.txt",
genes_gistic="Reports/Genes_GISTIC2.txt",
genes_rubic="Reports/Genes_RUBIC.txt",
overlap="Reports/Overlap_genes.txt",
venn="Reports/Venn_overlap_genes.png",
swarmplot="Reports/Swarmplot_sizes.png"
gistic="GISTIC/regions_track.conf_" + config["gistic_precision"] + ".bed",
rubic="RUBIC/regions_track.bed"
output:
html="Reports/Results.html"
run:
from snakemake.utils import report
with open(input.seg, 'r') as seg:
nr_samples = seg.readline().split("\t")[1].strip()
report("""
====================================================
Report on the results of the prioritization pipeline
====================================================
In total, {nr_samples} samples were present in the raw segmentation file.
See: Table T1_
""", output.html, metadata="Beatrice F. Tan (beatrice.ftan@gmail.com)", T1=input[0])
#**input)
"Reports/Overlap_regions.bed"
shell:
"bedtools intersect -a {input.gistic} -b {input.rubic} -wo > {output}"
rules/GISTIC2.smk
0 → 100644
View file @
25396c69
rule install_gistic:
"""Install GISTIC2 to a directory of choice."""
output:
config["gisticdir"]
shell:
"{workflow.basedir}/scripts/install_gistic2.sh {output}"
rule run_gistic:
"""Run GISTIC2 for the tumor segmentation data."""
input:
gistic_directory=config["gisticdir"],
seg="Input/Segments_tumor.txt"
output:
"GISTIC/all_lesions.conf_" + config["gistic_precision"] + ".txt"
params:
cnv="",
ref=config["reference"],
ref_file="",
extra=config["settings_gistic"]
benchmark:
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" + workflow.basedir + "/wrappers/GISTIC2"
rules/Rubic.smk
0 → 100644
View file @
25396c69
from Rubic import MarkerFile
rule markers_rubic:
"""Make marker file to use as input for RUBIC based on segmentation file (start, center and end positions of each segment)."""
input:
"Input/Segments_tumor.txt"
output:
"Input/Markers.txt"
run:
MarkerFile(input[0], output[0])
rule run_rubic:
"""Run RUBIC for the tumor segmentation data."""
input:
seg="Input/Segments_tumor.txt",
markers="Input/Markers.txt"
output:
out_gains="RUBIC/gains.txt",
out_losses="RUBIC/losses.txt",
out_plots="RUBIC/plots"
params:
fdr="0.25",
genefile=os.path.join(workflow.basedir, config["biomart_genes"]) if config["biomart_genes"].startswith("input_files") else config["bimart_genes"]
benchmark:
"Benchmarks/RUBIC." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" + workflow.basedir +"/wrappers/rubic"
scripts/Reports.py
View file @
25396c69
...
...
@@ -185,7 +185,7 @@ class ReportTools:
f
,
ax
=
plt
.
subplots
(
figsize
=
(
7
,
7
))
ax
.
set
(
yscale
=
"log"
)
g
=
sns
.
boxplot
(
x
=
"Tool"
,
y
=
"Size"
,
hue
=
"CNV type"
,
data
=
size_list
,
whis
=
np
.
inf
,
palette
=
"deep"
)
g
=
sns
.
swarmplot
(
x
=
"Tool"
,
y
=
"Size"
,
split
=
True
,
hue
=
"CNV type"
,
data
=
size_list
,
size
=
5
,
palette
=
{
"Amplification"
:
"#3f3f3f"
,
"Deletion"
:
"#3f3f3f"
})
g
=
sns
.
swarmplot
(
x
=
"Tool"
,
y
=
"Size"
,
dodge
=
True
,
hue
=
"CNV type"
,
data
=
size_list
,
size
=
5
,
palette
=
{
"Amplification"
:
"#3f3f3f"
,
"Deletion"
:
"#3f3f3f"
})
sns
.
despine
()
g
.
set_xlabel
(
"Tool"
,
fontsize
=
16
)
g
.
set_ylabel
(
"Log size of recurrent regions"
,
fontsize
=
16
)
...
...
scripts/circos/circos.conf
View file @
25396c69
...
...
@@ -3,7 +3,7 @@
<<
include
ticks
.
conf
>>
<
image
>
angle_offset
* = -
82
#
angle_offset* = -82
<<
include
etc
/
image
.
conf
>>
</
image
>
...
...
@@ -91,7 +91,7 @@ stroke_thickness = 2
</
rules
>
</
plot
>
<
plot
>
#
histogram
GISTIC
<
plot
>
#GISTIC
show
=
yes
type
=
histogram
max_gap
=
1
u
...
...
@@ -100,70 +100,58 @@ min = 0
max
=
20
r0
=
0
.
05
r
r1
=
0
.
615
r
color
=
black_a4
#thickness = 2
fill_color
=
black_a4
color
=
89
,
117
,
164
,
0
.
5
fill_color
=
89
,
117
,
164
,
0
.
5
orientation
=
in
</
plot
>
<
plot
>
#histogram RUBIC
show
=
yes
type
=
histogram
max_gap
=
1
u
file
=
conf
(
rubic_file
)
color
=
255
,
0
,
255
,
0
.
5
min
=
0
max
=
20
r0
=
0
.
05
r
r1
=
0
.
615
r
orientation
=
in
#thickness = 2
fill_color
=
255
,
0
,
255
,
0
.
5
</
plot
>
# scatter plot for values [-3,3] turned into a heat map
# by using r0=r1
<
plot
>
type
=
scatter
file
=
conf
(
gistic_file
)
r0
=
0
.
640
r
r1
=
0
.
640
r
min
=
0
max
=
3
0
max
=
100
0
glyph
=
square
glyph_size
=
12
fill_color
=
black
fill_color
=
89
,
117
,
164
<
rules
>
<
rule
>
condition
=
1
fill_color
=
black
#eval( "blue_a" . remap_int(var(value),0,30,0,30) )
fill_color
=
89
,
117
,
164
</
rule
>
</
rules
>
</
plot
>
# scatter plot for values [0,3] turned into a heat map
# by using r0=r1
<
plot
>
#RUBIC
show
=
yes
type
=
histogram
max_gap
=
1
u
file
=
conf
(
rubic_file
)
color
=
95
,
158
,
110
,
0
.
5
min
=
0
max
=
20
r0
=
0
.
05
r
r1
=
0
.
615
r
orientation
=
in
fill_color
=
95
,
158
,
110
,
0
.
5
</
plot
>
<
plot
>
type
=
scatter
file
=
conf
(
rubic_file
)
r0
=
0
.
625
r
r1
=
0
.
625
r
min
=
0
max
=
3
0
max
=
100
0
glyph
=
square
glyph_size
=
12
fill_color
=
black
fill_color
=
95
,
158
,
110
<
rules
>
<
rule
>
condition
=
1
fill_color
=
255
,
0
,
255
#eval( "gray_a" . remap_int(var(value),0,30,0,30) )
fill_color
=
95
,
158
,
110
</
rule
>
</
rules
>
</
plot
>
...
...
scripts/circos/ideogram.conf
View file @
25396c69
...
...
@@ -8,9 +8,9 @@ axis_break_at_edge = yes
axis_break
=
yes
axis_break_style
=
2
<
pairwise
hs22
hs1
>
spacing
=
5
0
r
</
pairwise
>
#
<pairwise hs22 hs1>
#
spacing =
1
0r
#
</pairwise>
<
break_style
1
>
stroke_color
=
black
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment