Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Beatrice Tan
CNAprioritization
Commits
82ca2106
Commit
82ca2106
authored
Jan 31, 2018
by
BeatriceTan
Browse files
Fixed P-value plot.
parent
7f42daed
Changes
2
Hide whitespace changes
Inline
Side-by-side
rules/ComparisonRegions.smk
View file @
82ca2106
...
...
@@ -16,7 +16,6 @@ rule report_tools:
genes_both="Reports/Genes_both.txt",
genes_gistic="Reports/Genes_GISTIC2.txt",
genes_rubic="Reports/Genes_RUBIC.txt",
overlap="Reports/Overlap_genes.txt",
venn="Reports/Venn_overlap_genes.png",
swarmplot="Reports/Swarmplot_sizes.png",
pvals="Reports/Overlap_pvalues.png"
...
...
@@ -28,7 +27,7 @@ rule report_tools:
run:
ReportTools.make_report(input.gistic, input.rubic_gain, input.rubic_loss,
params.census, params.known, params.ref, params.biomart_info,
output.tools, output.table_regions, output.venn,
output.overlap,
output.swarmplot,
output.tools, output.table_regions, output.venn, output.swarmplot,
output.genes_both, output.genes_gistic, output.genes_rubic,
input.overlap, output.pvals)
...
...
scripts/ReportTools.py
View file @
82ca2106
...
...
@@ -9,10 +9,11 @@ import os.path
import
pyensembl
from
ParseResults
import
parse
,
get_stats
,
install_ensembl
from
collections
import
OrderedDict
import
math
def
make_report
(
gistic_results
,
rubic_gain_results
,
rubic_loss_results
,
census_genes
,
known_genes
,
ref_genome
,
biomart_file
,
file_tools
,
file_regions
,
file_venn
,
file_overlap
,
file_swarmplot
,
file_tools
,
file_regions
,
file_venn
,
file_swarmplot
,
file_genes_both
,
file_genes_GISTIC
,
file_genes_RUBIC
,
bed_overlap
,
file_pvalues
):
"""Make a report and gene file from the analyses with GISTIC2 and RUBIC"""
...
...
@@ -25,10 +26,10 @@ def make_report(gistic_results, rubic_gain_results, rubic_loss_results,
parsed
.
append
(
parsed_results
)
stats
.
append
(
stats_results
)
make_files
(
parsed
,
stats
,
file_tools
,
file_genes_GISTIC
,
file_genes_RUBIC
,
file_genes_both
,
file_venn
,
file_swarmplot
,
file_regions
,
biomart_file
,
file
_overlap
)
file_venn
,
file_swarmplot
,
file_regions
,
biomart_file
,
bed
_overlap
,
file_pvalues
)
def
make_files
(
parsed
,
stats
,
file_tools
,
file_genes_GISTIC
,
file_genes_RUBIC
,
file_genes_both
,
file_venn
,
file_swarmplot
,
file_regions
,
biomart_file
,
file_overlap
,
bed_overlap
,
file_pvalues
):
file_venn
,
file_swarmplot
,
file_regions
,
biomart_file
,
bed_overlap
,
file_pvalues
):
#overlap_most_sign(parsed)
table_regions
(
parsed
,
file_regions
)
make_tool_report
(
file_tools
,
stats
)
...
...
@@ -120,7 +121,7 @@ def plot_sizes(parsed_results, plot_file):
handles
,
labels
=
g
.
get_legend_handles_labels
()
plt
.
legend
(
handles
[
0
:
2
],
labels
[:
2
],
fontsize
=
12
)
plt
.
savefig
(
plot_file
,
dpi
=
300
)
plt
.
close
()
#
plt.close()
def
venn_overlap
(
gene_lists
,
out_venn
):
#maybe add known genes?
"""Compare the genes detected by GISTIC and those by RUBIC."""
...
...
@@ -136,7 +137,7 @@ def venn_overlap(gene_lists, out_venn): #maybe add known genes?
c
.
get_patch_by_id
(
'11'
).
set_edgecolor
(
'none'
)
c
.
get_patch_by_id
(
'11'
).
set_alpha
(
1
)
plt
.
savefig
(
out_venn
,
dpi
=
300
)
plt
.
close
()
#
plt.close()
##Wrong
...
...
@@ -200,23 +201,30 @@ def overlapping_pvalues(bed_file, parsed_results, plot_file):
with
open
(
bed_file
,
'r'
)
as
overlap
:
for
line
in
overlap
:
g_chr
,
g_start
,
g_end
,
g_nr
,
r_chr
,
r_start
,
r_end
,
r_nr
,
bp_overlap
=
line
.
strip
().
split
(
"
\t
"
)
for
gistic
,
rubic
in
parsed_results
:
for
region
in
gistic
:
if
region
[
0
:
3
]
==
[
g_chr
,
g_start
,
g_end
]:
g_pval
=
region
[
4
]
for
region
in
rubic
:
if
region
[
0
:
3
]
==
[
r_chr
,
r_start
,
r_end
]:
r_left_pv
,
r_right_pv
=
region
[
4
]
pvals
.
append
((
g_pval
,
r_left_pv
))
for
region
in
parsed_results
[
0
]:
if
region
[
0
:
3
]
==
[
g_chr
.
strip
(
"chr"
),
g_start
,
g_end
]:
g_pval
=
region
[
4
]
for
region
in
parsed_results
[
1
]:
if
region
[
0
:
3
]
==
[
r_chr
.
strip
(
"chr"
),
r_start
,
r_end
]:
r_left_pv
,
r_right_pv
=
region
[
4
]
r_pval
=
min
(
float
(
r_left_pv
),
float
(
r_right_pv
))
if
float
(
r_pval
)
>
0
:
gpv
=
math
.
pow
(
10
,
float
(
g_pval
)
*-
1
)
print
(
gpv
)
rpv
=
math
.
pow
(
10
,
float
(
r_pval
)
*-
1
)
print
(
rpv
)
#pvals.append((float(g_pval), float(r_left_pv)))
pvals
.
append
((
gpv
,
rpv
))
labels
=
[
"P-value GISTIC2.0"
,
"P-value RUBIC"
]
pval_df
=
pd
.
DataFrame
(
pvals
,
columns
=
labels
)
print
(
pval_df
)
plot_correlation
(
pval_df
,
plot_file
)
def
plot_correlation
(
df_pvals
,
plot_file
):
"""Plot correlation between P-values."""
g
=
sns
.
jointplot
(
"P-value GISTIC2.0"
,
"P-value RUBIC"
,
data
=
df_pvals
,
kind
=
"reg"
,
xlim
=
(
0
,
60
),
ylim
=
(
0
,
12
),
color
=
"r"
,
size
=
7
)
g
=
sns
.
jointplot
(
"P-value GISTIC2.0"
,
"P-value RUBIC"
,
data
=
df_pvals
,
kind
=
"reg"
,
color
=
"r"
,
size
=
7
,
xlim
=
(
0
,
0.25
),
ylim
=
(
0
,
0.25
))
pr
=
pearsonr
(
df_pvals
[
"P-value GISTIC2.0"
],
df_pvals
[
"P-value RUBIC"
])
print
(
pr
)
plt
.
text
(
0.9
,
0.1
,
pr
)
plt
.
savefig
(
plot_file
,
dpi
=
300
)
plt
.
close
()
#
plt.close()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment