Commit 82ca2106 authored by BeatriceTan's avatar BeatriceTan

Fixed P-value plot.

parent 7f42daed
......@@ -16,7 +16,6 @@ rule report_tools:
genes_both="Reports/Genes_both.txt",
genes_gistic="Reports/Genes_GISTIC2.txt",
genes_rubic="Reports/Genes_RUBIC.txt",
overlap="Reports/Overlap_genes.txt",
venn="Reports/Venn_overlap_genes.png",
swarmplot="Reports/Swarmplot_sizes.png",
pvals="Reports/Overlap_pvalues.png"
......@@ -28,7 +27,7 @@ rule report_tools:
run:
ReportTools.make_report(input.gistic, input.rubic_gain, input.rubic_loss,
params.census, params.known, params.ref, params.biomart_info,
output.tools, output.table_regions, output.venn, output.overlap, output.swarmplot,
output.tools, output.table_regions, output.venn, output.swarmplot,
output.genes_both, output.genes_gistic, output.genes_rubic,
input.overlap, output.pvals)
......
......@@ -9,10 +9,11 @@ import os.path
import pyensembl
from ParseResults import parse, get_stats, install_ensembl
from collections import OrderedDict
import math
def make_report(gistic_results, rubic_gain_results, rubic_loss_results,
census_genes, known_genes, ref_genome, biomart_file,
file_tools, file_regions, file_venn, file_overlap, file_swarmplot,
file_tools, file_regions, file_venn, file_swarmplot,
file_genes_both, file_genes_GISTIC, file_genes_RUBIC,
bed_overlap, file_pvalues):
"""Make a report and gene file from the analyses with GISTIC2 and RUBIC"""
......@@ -25,10 +26,10 @@ def make_report(gistic_results, rubic_gain_results, rubic_loss_results,
parsed.append(parsed_results)
stats.append(stats_results)
make_files(parsed, stats, file_tools, file_genes_GISTIC, file_genes_RUBIC, file_genes_both,
file_venn, file_swarmplot, file_regions, biomart_file, file_overlap)
file_venn, file_swarmplot, file_regions, biomart_file, bed_overlap, file_pvalues)
def make_files(parsed, stats, file_tools, file_genes_GISTIC, file_genes_RUBIC, file_genes_both,
file_venn, file_swarmplot, file_regions, biomart_file, file_overlap, bed_overlap, file_pvalues):
file_venn, file_swarmplot, file_regions, biomart_file, bed_overlap, file_pvalues):
#overlap_most_sign(parsed)
table_regions(parsed, file_regions)
make_tool_report(file_tools, stats)
......@@ -120,7 +121,7 @@ def plot_sizes(parsed_results, plot_file):
handles, labels = g.get_legend_handles_labels()
plt.legend(handles[0:2], labels[:2], fontsize=12)
plt.savefig(plot_file, dpi=300)
plt.close()
#plt.close()
def venn_overlap(gene_lists, out_venn): #maybe add known genes?
"""Compare the genes detected by GISTIC and those by RUBIC."""
......@@ -136,7 +137,7 @@ def venn_overlap(gene_lists, out_venn): #maybe add known genes?
c.get_patch_by_id('11').set_edgecolor('none')
c.get_patch_by_id('11').set_alpha(1)
plt.savefig(out_venn, dpi=300)
plt.close()
#plt.close()
##Wrong
......@@ -200,23 +201,30 @@ def overlapping_pvalues(bed_file, parsed_results, plot_file):
with open(bed_file, 'r') as overlap:
for line in overlap:
g_chr, g_start, g_end, g_nr, r_chr, r_start, r_end, r_nr, bp_overlap = line.strip().split("\t")
for gistic, rubic in parsed_results:
for region in gistic:
if region[0:3] == [g_chr, g_start, g_end]:
g_pval = region[4]
for region in rubic:
if region[0:3] == [r_chr, r_start, r_end]:
r_left_pv, r_right_pv = region[4]
pvals.append((g_pval, r_left_pv))
for region in parsed_results[0]:
if region[0:3] == [g_chr.strip("chr"), g_start, g_end]:
g_pval = region[4]
for region in parsed_results[1]:
if region[0:3] == [r_chr.strip("chr"), r_start, r_end]:
r_left_pv, r_right_pv = region[4]
r_pval = min(float(r_left_pv), float(r_right_pv))
if float(r_pval) > 0:
gpv = math.pow(10, float(g_pval)*-1)
print(gpv)
rpv = math.pow(10, float(r_pval)*-1)
print(rpv)
#pvals.append((float(g_pval), float(r_left_pv)))
pvals.append((gpv, rpv))
labels = ["P-value GISTIC2.0", "P-value RUBIC"]
pval_df = pd.DataFrame(pvals, columns=labels)
print(pval_df)
plot_correlation(pval_df, plot_file)
def plot_correlation(df_pvals, plot_file):
"""Plot correlation between P-values."""
g = sns.jointplot("P-value GISTIC2.0", "P-value RUBIC", data=df_pvals, kind="reg",
xlim=(0, 60), ylim=(0, 12), color="r", size=7)
g = sns.jointplot("P-value GISTIC2.0", "P-value RUBIC", data=df_pvals, kind="reg", color="r", size=7, xlim=(0,0.25), ylim=(0,0.25))
pr = pearsonr(df_pvals["P-value GISTIC2.0"], df_pvals["P-value RUBIC"])
print(pr)
plt.text(0.9, 0.1, pr)
plt.savefig(plot_file, dpi=300)
plt.close()
#plt.close()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment