Commit e70aeb29 authored by Beatrice Tan's avatar Beatrice Tan

Added function to Report sizes.

parent 71025db0
......@@ -13,24 +13,34 @@ from collections import OrderedDict
def make_report(size_gistic, size_rubic_gains, size_rubic_losses, census_genes, known_genes, reps, sizes, ref_genome, report_file, plot_dir):
"""Make a report of the results produced using input files with different sample sizes."""
list_stats = []
with open(report_file, 'w') as out:
dict_stats = {}
for i in range(len(size_rubic_gains)): #loop through sizes
size_rep = size_rubic_gains[i].split("Size")[1].split("/gains")[0]
size, repetition = size_rep.split(".Rep")
for tool in 'GISTIC', 'RUBIC':
size_file = (size_rubic_gains[i], size_rubic_losses[i]) if tool == 'RUBIC' else size_gistic[i]
parsed_results = parse_regions(size_file, known_genes, census_genes, tool)
stats_results = get_stats(parsed_results, size)
for stat_list in stats_results[0], stats_results[1]:
converted_stats = [tool] + stat_list[0:2]
for stat in stat_list[2:5]:
converted_stats.append(float(stat))
for stat in stat_list[5:]:
converted_stats.append(float(stat.split(" (")[0]))
list_stats.append(converted_stats)
all_results = {}
for i in range(len(size_rubic_gains)): #loop through sizes
size_rep = size_rubic_gains[i].split("Size")[1].split("/gains")[0]
size, repetition = size_rep.split(".Rep")
for tool in 'GISTIC', 'RUBIC':
size_file = (size_rubic_gains[i], size_rubic_losses[i]) if tool == 'RUBIC' else size_gistic[i]
parsed_results = parse_regions(size_file, known_genes, census_genes, tool)
if tool not in all_results.keys():
all_results[tool] = [parsed_results]
else:
all_results[tool] = all_results[tool] + [parsed_results]
stats_results = get_stats(parsed_results, size)
for stat_list in stats_results[0], stats_results[1]:
converted_stats = [tool] + stat_list[0:2]
for stat in stat_list[2:5]:
converted_stats.append(float(stat))
for stat in stat_list[5:]:
converted_stats.append(float(stat.split(" (")[0]))
list_stats.append(converted_stats)
overlap_genes(all_results, report_file)
make_plots(list_stats, reps, sizes, plot_dir)
def overlap_genes(all_results, report_file):
for tool in all_results.keys():
print(all_results)
def make_plots(list_stats, reps, sizes, plot_dir):
plot_y_axis = (['Number of recurrent regions', 'Average size of regions (Kb)', 'Total size (Mb)',
'Number of genes', 'Nr. regions with known genes', 'Nr. regions with census genes'])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment