Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Beatrice Tan
CNAprioritization
Commits
e70aeb29
Commit
e70aeb29
authored
Feb 04, 2018
by
Beatrice Tan
Browse files
Added function to Report sizes.
parent
71025db0
Changes
1
Hide whitespace changes
Inline
Side-by-side
scripts/ReportSizes.py
View file @
e70aeb29
...
...
@@ -13,24 +13,34 @@ from collections import OrderedDict
def
make_report
(
size_gistic
,
size_rubic_gains
,
size_rubic_losses
,
census_genes
,
known_genes
,
reps
,
sizes
,
ref_genome
,
report_file
,
plot_dir
):
"""Make a report of the results produced using input files with different sample sizes."""
list_stats
=
[]
with
open
(
report_file
,
'w'
)
as
out
:
dict_stats
=
{}
for
i
in
range
(
len
(
size_rubic_gains
)):
#loop through sizes
size_rep
=
size_rubic_gains
[
i
].
split
(
"Size"
)[
1
].
split
(
"/gains"
)[
0
]
size
,
repetition
=
size_rep
.
split
(
".Rep"
)
for
tool
in
'GISTIC'
,
'RUBIC'
:
size_file
=
(
size_rubic_gains
[
i
],
size_rubic_losses
[
i
])
if
tool
==
'RUBIC'
else
size_gistic
[
i
]
parsed_results
=
parse_regions
(
size_file
,
known_genes
,
census_genes
,
tool
)
stats_results
=
get_stats
(
parsed_results
,
size
)
for
stat_list
in
stats_results
[
0
],
stats_results
[
1
]:
converted_stats
=
[
tool
]
+
stat_list
[
0
:
2
]
for
stat
in
stat_list
[
2
:
5
]:
converted_stats
.
append
(
float
(
stat
))
for
stat
in
stat_list
[
5
:]:
converted_stats
.
append
(
float
(
stat
.
split
(
" ("
)[
0
]))
list_stats
.
append
(
converted_stats
)
all_results
=
{}
for
i
in
range
(
len
(
size_rubic_gains
)):
#loop through sizes
size_rep
=
size_rubic_gains
[
i
].
split
(
"Size"
)[
1
].
split
(
"/gains"
)[
0
]
size
,
repetition
=
size_rep
.
split
(
".Rep"
)
for
tool
in
'GISTIC'
,
'RUBIC'
:
size_file
=
(
size_rubic_gains
[
i
],
size_rubic_losses
[
i
])
if
tool
==
'RUBIC'
else
size_gistic
[
i
]
parsed_results
=
parse_regions
(
size_file
,
known_genes
,
census_genes
,
tool
)
if
tool
not
in
all_results
.
keys
():
all_results
[
tool
]
=
[
parsed_results
]
else
:
all_results
[
tool
]
=
all_results
[
tool
]
+
[
parsed_results
]
stats_results
=
get_stats
(
parsed_results
,
size
)
for
stat_list
in
stats_results
[
0
],
stats_results
[
1
]:
converted_stats
=
[
tool
]
+
stat_list
[
0
:
2
]
for
stat
in
stat_list
[
2
:
5
]:
converted_stats
.
append
(
float
(
stat
))
for
stat
in
stat_list
[
5
:]:
converted_stats
.
append
(
float
(
stat
.
split
(
" ("
)[
0
]))
list_stats
.
append
(
converted_stats
)
overlap_genes
(
all_results
,
report_file
)
make_plots
(
list_stats
,
reps
,
sizes
,
plot_dir
)
def
overlap_genes
(
all_results
,
report_file
):
for
tool
in
all_results
.
keys
():
print
(
all_results
)
def
make_plots
(
list_stats
,
reps
,
sizes
,
plot_dir
):
plot_y_axis
=
([
'Number of recurrent regions'
,
'Average size of regions (Kb)'
,
'Total size (Mb)'
,
'Number of genes'
,
'Nr. regions with known genes'
,
'Nr. regions with census genes'
])
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment