Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Beatrice Tan
CNAprioritization
Commits
ea286e13
Commit
ea286e13
authored
Feb 19, 2018
by
Beatrice Tan
Browse files
Updated report.
parent
57c59da7
Changes
3
Show whitespace changes
Inline
Side-by-side
scripts/ParseResults.py
View file @
ea286e13
...
...
@@ -128,6 +128,7 @@ def get_stats(list_regions, used_tool):
return_info
=
[]
for
cnv_type
in
'amp'
,
'del'
:
tot_size
,
nr_genes
,
nr_reg
,
known_count
,
census_count
,
focal_count
=
0
,
0
,
0
,
0
,
0
,
0
known_census_count
,
known_focal
=
0
,
0
all_genes
=
[]
driver_density
=
[]
for
alt
in
list_regions
:
...
...
@@ -135,22 +136,23 @@ def get_stats(list_regions, used_tool):
nr_reg
+=
1
reg_size
=
int
(
alt
[
2
])
-
int
(
alt
[
1
])
tot_size
+=
reg_size
if
reg_size
<
3
000000
:
if
reg_size
<
10
000000
:
focal_count
+=
1
if
alt
[
5
]
!=
[]:
known_focal
+=
1
all_genes
=
all_genes
+
alt
[
7
]
if
alt
[
5
]
!=
[]:
known_count
+=
1
if
alt
[
6
]
!=
[]:
census_count
+=
1
#print(alt[7])
drivers
=
1
/
len
(
alt
[
7
])
if
alt
[
7
]
!=
[]
else
0
driver_density
.
append
(
drivers
)
nr_genes
=
len
(
list
(
set
(
all_genes
)))
#unique genes only
stats
=
calculate_stats
(
tot_size
,
nr_genes
,
nr_reg
,
known_count
,
census_count
,
focal_count
,
cnv_type
,
used_tool
,
driver_density
)
stats
=
calculate_stats
(
tot_size
,
nr_genes
,
nr_reg
,
known_count
,
census_count
,
focal_count
,
cnv_type
,
used_tool
,
driver_density
,
known_focal
)
return_info
.
append
(
stats
)
return
return_info
def
calculate_stats
(
tot_size
,
nr_genes
,
nr_reg
,
known_count
,
census_count
,
focal_count
,
cnv_type
,
used_tool
,
driver_density
):
def
calculate_stats
(
tot_size
,
nr_genes
,
nr_reg
,
known_count
,
census_count
,
focal_count
,
cnv_type
,
used_tool
,
driver_density
,
known_focal
):
"""Calculate average & total numbers for report on recurrent regions."""
type_name
=
'Amplifications'
if
cnv_type
==
'amp'
else
'Deletions'
avg_size
=
int
(
tot_size
/
nr_reg
/
1000
)
if
nr_reg
!=
0
else
'N/A'
#average size in kb
...
...
@@ -158,8 +160,8 @@ def calculate_stats(tot_size, nr_genes, nr_reg, known_count, census_count, focal
avg_driver_density
=
round
(
sum
(
driver_density
)
/
nr_reg
,
2
)
stats
=
[
used_tool
,
type_name
,
nr_reg
,
avg_size
,
round_tot_size
,
nr_genes
,
avg_driver_density
]
stats
=
list
(
map
(
str
,
stats
))
for
count
in
focal_count
,
known_count
,
census_count
:
for
count
in
focal_count
,
known_count
,
census_count
,
known_focal
:
percent
=
(
count
/
nr_reg
*
100
)
if
count
!=
0
else
0
stat_str
=
str
(
count
)
+
" ("
+
str
(
round
(
percent
,
2
))
+
"%)"
stat_str
=
str
(
count
)
+
" ("
+
str
(
int
(
percent
))
+
"%)"
stats
.
append
(
stat_str
)
return
stats
scripts/ReportSizes.py
View file @
ea286e13
...
...
@@ -20,11 +20,6 @@ def make_report(size_gistic, size_rubic_gains, size_rubic_losses, census_genes,
for
tool
in
'GISTIC'
,
'RUBIC'
:
size_file
=
(
size_rubic_gains
[
i
],
size_rubic_losses
[
i
])
if
tool
==
'RUBIC'
else
size_gistic
[
i
]
parsed_results
=
parse_regions
(
size_file
,
known_genes
,
census_genes
,
tool
,
ref_genome
)
#if tool not in all_results.keys():
# all_results[tool] = {}
# all_results[tool][size] = [parsed_results]
#else:
# all_results[tool][size] = all_results[tool][size] + [parsed_results]
stats_results
=
get_stats
(
parsed_results
,
size
)
for
stat_list
in
stats_results
[
0
],
stats_results
[
1
]:
converted_stats
=
[
tool
]
+
stat_list
[
0
:
2
]
...
...
@@ -50,8 +45,10 @@ def overlap_genes(all_results, report_file):
def
make_plots
(
list_stats
,
reps
,
sizes
,
plot_dir
):
plot_y_axis
=
([
'Number of recurrent regions'
,
'Average size of regions (Kb)'
,
'Total size (Mb)'
,
'Number of genes'
,
'Number of known regions'
,
'Number of census regions'
])
plot_y_axis
=
([
"No. regions"
,
"Avg. size (Kb)"
,
"Total size (Mb)"
,
"No. genes"
,
"Average driver density"
,
"No. focal regions"
,
"No. known regions"
,
"No. census regions"
,
"No. known focal regions"
])
# 'Number of recurrent regions', 'Average size of regions (Kb)', 'Total size (Mb)',
# 'Number of genes', 'Number of known regions', 'Number of census regions'])
df_stats
=
pd
.
DataFrame
(
list_stats
,
columns
=
[
'Tool'
,
'Sample size'
,
'Type'
]
+
plot_y_axis
)
for
plot_name
in
plot_y_axis
:
plot_size_differences
(
df_stats
,
plot_name
,
sizes
,
len
(
reps
),
plot_dir
)
...
...
@@ -63,7 +60,7 @@ def plot_size_differences(df_stats, value_y_axis, list_sizes, nr_reps, plot_dir)
sns
.
set_style
(
"whitegrid"
)
g
=
sns
.
factorplot
(
x
=
"Sample size"
,
y
=
value_y_axis
,
col
=
"Type"
,
hue
=
"Tool"
,
data
=
df_stats
,
kind
=
"box"
,
size
=
5
,
aspect
=
1
,
palette
=
[
"#5975A4"
,
"#5F9E6E"
],
order
=
list
(
map
(
str
,
list_sizes
)))
if
value_y_axis
in
[
'
Av
erage size of regions
(Kb)
'
,
'
Total size (Mb)
'
,
'Number of
genes
'
]:
if
value_y_axis
in
[
"
Av
g. size
(Kb)
"
,
"
Total size (Mb)
"
,
"No.
genes
"
]:
label_y_axis
=
'Log '
+
value_y_axis
[
0
].
lower
()
+
value_y_axis
[
1
:]
g
.
fig
.
get_axes
()[
0
].
set_yscale
(
'log'
)
else
:
...
...
scripts/ReportTools.py
View file @
ea286e13
...
...
@@ -29,14 +29,15 @@ def make_report(gistic_results, rubic_gain_results, rubic_loss_results,
parsed
.
append
(
parsed_results
)
stats_results
=
get_stats
(
parsed_results
,
tool
)
stats
.
append
(
stats_results
)
make_table_regions
(
parsed
[
0
:
2
],
file_regions
)
#make table with all recurrent regions
make_table_regions
(
parsed
[
0
:
2
],
file_regions
,
ref_genome
)
#make table with all recurrent regions
make_tool_report
(
file_tools
,
stats
)
#make tool report
extract_gene_lists
(
parsed
,
file_genes_GISTIC
,
file_genes_RUBIC
,
file_genes_both
,
known_genes
,
file_venn
,
ref_genome
)
#make gene files and venn overlap plot
plot_histogram_sizes
(
parsed
,
file_swarmplot
)
#plot histogram with the sizes of the regions
def
make_tool_report
(
file_tools
,
stats_tools
):
"""Make a report of the results from GISTIC and RUBIC."""
row_names
=
[
"Tool"
,
"Type"
,
"No. regions"
,
"Avg. size (Kb)"
,
"Total size (Mb)"
,
"No. genes"
,
"Average driver density"
,
"No. focal regions"
,
"No. known regions"
,
"No. census regions"
]
row_names
=
[
"Tool"
,
"Type"
,
"No. regions"
,
"Avg. size (Kb)"
,
"Total size (Mb)"
,
"No. genes"
,
"Average driver density"
,
"No. focal regions"
,
"No. known regions"
,
"No. census regions"
,
"No. known focal regions"
]
with
open
(
file_tools
,
'w'
)
as
out
:
for
i
in
range
(
len
(
row_names
)):
list_out
=
[
row_names
[
i
]]
...
...
@@ -45,10 +46,10 @@ def make_tool_report(file_tools, stats_tools):
list_out
=
list
(
map
(
str
,
list_out
))
out
.
write
(
"
\t
"
.
join
(
list_out
)
+
"
\n
"
)
def
make_table_regions
(
parsed_tools
,
file_regions
):
def
make_table_regions
(
parsed_tools
,
file_regions
,
ref_genome
):
"""Make a table with all recurent regions detected by both tools."""
with
open
(
file_regions
,
'w'
)
as
out
:
header
=
[
"Method"
,
"Chr"
,
"Start"
,
"End"
,
"Type"
,
"Negative log10 q-value"
,
"Known genes"
,
"Census genes"
,
"
All
genes"
]
header
=
[
"Method"
,
"Chr"
,
"Start"
,
"End"
,
"Type"
,
"Negative log10 q-value"
,
"Known genes"
,
"Census genes"
,
"
Total number of
genes"
]
out
.
write
(
"
\t
"
.
join
(
header
)
+
"
\n
"
)
for
tool
in
parsed_tools
:
tool_name
=
"GISTIC"
if
tool
==
parsed_tools
[
0
]
else
"RUBIC"
...
...
@@ -58,8 +59,12 @@ def make_table_regions(parsed_tools, file_regions):
q_vals
=
", "
.
join
(
cnv
[
4
])
if
type
(
cnv
[
4
])
==
tuple
else
cnv
[
4
]
begin_row
=
cnv
[
0
:
3
]
+
[
cnv_type
,
str
(
q_vals
)]
out
.
write
(
"
\t
"
.
join
(
begin_row
))
for
list_genes
in
range
(
5
,
8
):
out
.
write
(
"
\t
"
+
", "
.
join
(
cnv
[
list_genes
]))
for
list_genes
in
range
(
5
,
7
):
out_genes
=
gene_ID_to_name
(
cnv
[
list_genes
],
ref_genome
)
out
.
write
(
"
\t
"
+
", "
.
join
(
out_genes
))
out
.
write
(
"
\t
"
+
str
(
len
(
cnv
[
7
])))
#for list_genes in range(5,8):
#out.write("\t" + ", ".join(cnv[list_genes]))
out
.
write
(
"
\n
"
)
def
extract_gene_lists
(
parsed_results
,
out_GISTIC
,
out_RUBIC
,
out_both
,
known_genes
,
out_venn
,
ref_genome
):
...
...
@@ -107,17 +112,16 @@ def venn3_overlap(gene_lists, known_genes_file, out_venn, ref_genome):
plt
.
subplots
(
figsize
=
(
7
,
7
))
venn_sets
=
[
set
(
gene_lists
[
0
]),
set
(
gene_lists
[
1
]),
set
(
known_genes
)]
c
=
venn3
(
venn_sets
,
(
'GISTIC'
,
'RUBIC'
,
' Known genes'
))
#c.get_patch_by_id('100').set_color('#5975A4'), c.get_patch_by_id('010').set_color('#5F9E6E')
#c.get_patch_by_id('001').set_color('#857AAA'), c.get_patch_by_id('011').set_color('#748A8F')
#c.get_patch_by_id('101').set_color('#6D77A7'), c.get_patch_by_id('110').set_color('#5C888B')
#c.get_patch_by_id('100').set_alpha(1), c.get_patch_by_id('010').set_alpha(1)
#c.get_patch_by_id('001').set_alpha(1), c.get_patch_by_id('011').set_alpha(1)
#c.get_patch_by_id('101').set_alpha(1), c.get_patch_by_id('110').set_alpha(1)
#try:
# c.get_patch_by_id('111').set_color('#B55D60')
# c.get_patch_by_id('111').set_alpha(1)
#except:
# pass #too small overlap from three groups to show the overlapping region
try
:
c
.
get_patch_by_id
(
'100'
).
set_color
(
'#5975A4'
),
c
.
get_patch_by_id
(
'100'
).
set_alpha
(
1
)
c
.
get_patch_by_id
(
'010'
).
set_color
(
'#5F9E6E'
),
c
.
get_patch_by_id
(
'010'
).
set_alpha
(
1
)
c
.
get_patch_by_id
(
'110'
).
set_color
(
'#5C888B'
),
c
.
get_patch_by_id
(
'110'
).
set_alpha
(
1
)
c
.
get_patch_by_id
(
'001'
).
set_color
(
'#857AAA'
),
c
.
get_patch_by_id
(
'001'
).
set_alpha
(
1
)
c
.
get_patch_by_id
(
'101'
).
set_color
(
'#6D77A7'
),
c
.
get_patch_by_id
(
'101'
).
set_alpha
(
1
)
c
.
get_patch_by_id
(
'011'
).
set_color
(
'#748A8F'
),
c
.
get_patch_by_id
(
'011'
).
set_alpha
(
1
)
c
.
get_patch_by_id
(
'111'
).
set_color
(
'#B55D60'
),
c
.
get_patch_by_id
(
'111'
).
set_alpha
(
1
)
except
:
pass
#too small overlap from three groups to show the overlapping region
plt
.
savefig
(
out_venn
,
dpi
=
300
)
def
plot_histogram_sizes
(
parsed_results
,
plot_file
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment