Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Beatrice Tan
CNAprioritization
Commits
4592528e
Commit
4592528e
authored
Jan 28, 2018
by
Beatrice Tan
Browse files
Updated report on sample sizes.
parent
7c36cd45
Changes
6
Hide whitespace changes
Inline
Side-by-side
Snakefile
View file @
4592528e
...
...
@@ -37,11 +37,11 @@ onerror:
rule all:
"""Define desired output from pipeline."""
input:
"Samplesize/Report.txt",
#"Reports/Circos/RecurrentRegions.png"
#"GO/comparison.txt",
#"Input/Segments_tumor.txt",
#"Reports/Control.txt",
"Reports/Tools.txt",
#
"Reports/Tools.txt",
#"Reports/Results.html",
#"Reports/Control.txt",
...
...
config.yaml
View file @
4592528e
...
...
@@ -16,7 +16,17 @@ prev_found_genes: input_files/intogen-CM-drivers-data.tsv
census_genes
:
input_files/Census_genes.txt
biomart_genes
:
input_files/biomart_human_genes.tsv
precision
:
'
99'
#Settings GISTIC2.0
gistic_precision
:
"
99"
settings_gistic
:
"
-ta
0.1
-td
0.1
-qvt
0.25
-brlen
0.7
-cap
1.5
-rx
1
-genegistic
1"
#Settings for sample size differences
sizes
:
[
20
,
30
,
40
,
50
,
60
,
70
,
80
,
90
]
...
...
rules/RecurrentRegions.smk
View file @
4592528e
...
...
@@ -17,12 +17,12 @@ rule run_gistic:
gistic_directory=config["gisticdir"],
seg="Input/Segments_tumor.txt"
output:
"GISTIC/all_lesions.conf_" + config["precision"] + ".txt"
"GISTIC/all_lesions.conf_" + config["
gistic_
precision"] + ".txt"
params:
cnv="",
ref=config["reference"],
ref_file="",
extra=
""
extra=
config["settings_gistic"]
benchmark:
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt" #space between date and time!
wrapper:
...
...
@@ -85,7 +85,7 @@ rule make_circos:
rule report_tools:
"""Report the differences in calls between GISTIC2 and RUBIC."""
input:
gistic="GISTIC/all_lesions.conf_" + config["precision"] + ".txt",
gistic="GISTIC/all_lesions.conf_" + config["
gistic_
precision"] + ".txt",
rubic_gain="RUBIC/gains.txt",
rubic_loss="RUBIC/losses.txt"
output:
...
...
rules/SampleSizes.smk
View file @
4592528e
...
...
@@ -5,55 +5,51 @@ from Rubic import MarkerFile
rule seg_subsets:
"""Create segmentation files with different numbers of samples (randomly chosen) for a number of times."""
input:
config["inputfile"] + "_new
"
"Input/Segments_tumor.txt
"
output:
expand("Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt", rand_nr=config["sizes"], rep_nr=config["repeats"])
run:
SegFile(input[0], output[0])
rule gistic_subsets:
rule
run_
gistic_subsets:
"""Run GISTIC2 for the segmentation files with different subsets."""
input:
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt"
,
cnv="
"
gistic_directory=config["gisticdir"]
,
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt
"
output:
"Samplesize/GISTIC
2
/Size{rand_nr}.Rep{rep_nr}/"
"Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/
all_lesions.conf_" + config["gistic_precision"] + ".txt
"
params:
cnv="",
gistic_directory=config["gisticdir"],
ref=config["reference"],
ref_file="",
extra=
""
extra=
config["settings_gistic"]
wrapper:
"file:" +
os.path.join(
workflow.basedir
,
"wrappers/GISTIC2"
)
"file:" + workflow.basedir
+
"
/
wrappers/GISTIC2"
rule seg_rubic_subsets:
"""Create segmentation files to use as input for RUBIC."""
input:
"Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt"
output:
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}_rubic.txt",
marker="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}_marker.txt"
run:
get_seg_rubic(input[0], output.seg, output.marker)
rule rubic_subsets:
rule run_rubic_subsets:
"""Run RUBIC for the segmentation files with different subsets."""
input:
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}
_rubic
.txt",
markers="
Samplesize/Input/Size{rand_nr}.Rep{rep_nr}_m
arker.txt"
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt",
markers="
Input/M
arker
s
.txt"
output:
"Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/"
out_gains="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/gains.txt",
out_losses="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/losses.txt",
out_plots="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/plots"
params:
fdr="0.25",
genefile=config["biomart_genes"]
genefile=os.path.join(workflow.basedir, config["biomart_genes"]) if config["biomart_genes"].startswith("input_files") else config["bimart_genes"]
benchmark:
"Benchmarks/RUBIC." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" +
os.path.join(
workflow.basedir
, "
wrappers/rubic"
)
"file:" + workflow.basedir
+"/
wrappers/rubic"
rule report_sizes:
"""Report the difference when using different sample sizes."""
input:
gistic=expand("Samplesize/GISTIC2/Size{rand_nr}.Rep{rep_nr}/", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/", rand_nr=config["sizes"], rep_nr=config["repeats"])
gistic=expand("Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/all_lesions.conf_" + config["gistic_precision"] + ".txt", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic_gains=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/gains.txt", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic_losses=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/losses.txt", rand_nr=config["sizes"], rep_nr=config["repeats"])
output:
report="Samplesize/Report.txt",
plots="Samplesize/Plots/"
...
...
scripts/Reports.py
View file @
4592528e
...
...
@@ -293,15 +293,16 @@ class ReportSizes:
with
open
(
report_file
,
'w'
)
as
out
:
dict_stats
=
OrderedDict
()
row_names
=
([
"Size"
,
"Type"
,
"Nr. regions"
,
"Avg. size (Kb)"
,
"Total size (Mb)"
,
"Nr. genes"
,
"Nr. regions with census genes"
,
"Nr. regions with known genes"
])
"Nr. regions with census genes"
,
"Nr. regions with known genes"
,
"Nr. genes"
])
tool
=
GISTIC2
for
size_file
in
size_results
:
parsed_gistic
=
parse
().
gistic_results
(
size_file
)
parsed_gistic
=
parse
d_results
=
parse
(
size_file
,
known_genes
,
census_genes
,
tool
)
size
,
repetition
=
size_file
.
split
(
"/"
)[
-
1
].
split
(
"x"
)
stats_
gistic
=
self
.
calculate_stats
(
parsed_gistic
,
census_genes
,
known_gene
s
,
size
)
stats_
results
=
get_stats
(
parsed_result
s
,
size
)
if
size
not
in
dict_stats
.
keys
():
dict_stats
[
size
]
=
[
stats_
gistic
]
dict_stats
[
size
]
=
[
stats_
results
]
else
:
dict_stats
[
size
]
=
dict_stats
[
size
]
+
[
stats_
gistic
]
dict_stats
[
size
]
=
dict_stats
[
size
]
+
[
stats_
results
]
out
.
write
(
dict_stats
)
self
.
extract_stats_sizes
(
dict_stats
,
reps
,
plot_dir
)
...
...
@@ -318,9 +319,9 @@ class ReportSizes:
if
reg_size
!=
'N/A'
:
avg_size
.
append
(
float
(
reg_size
))
total_size
.
append
(
float
(
stats
[
4
]))
nr_genes
.
append
(
float
(
stats
[
5
]))
nr_census
.
append
(
float
(
stats
[
7
].
split
(
" ("
)[
0
]))
nr_known
.
append
(
float
(
stats
[
8
].
split
(
" ("
)[
0
]))
nr_genes
.
append
(
float
(
stats
[
7
]))
nr_census
.
append
(
float
(
stats
[
6
].
split
(
" ("
)[
0
]))
nr_known
.
append
(
float
(
stats
[
5
].
split
(
" ("
)[
0
]))
plot_data
=
[
nr_regions
,
avg_size
,
total_size
,
nr_genes
,
nr_census
,
nr_known
]
plot_y_axis
=
([
'Number of recurrent regions'
,
'Average size of regions (Kb)'
,
'Total size (Mb)'
,
'Number of genes'
,
'Nr. regions with census genes'
,
'Nr. regions with known genes'
])
...
...
wrappers/GISTIC2/wrapper.py
View file @
4592528e
...
...
@@ -9,7 +9,7 @@ from snakemake.shell import shell
#Convert file locations to absolute paths
segments
=
os
.
path
.
abspath
(
snakemake
.
input
.
seg
)
gistic_dir
=
os
.
path
.
abspath
(
snakemake
.
input
.
gistic_directory
)
outfolder
=
os
.
path
.
abspath
(
snakemake
.
output
[
0
])
outfolder
=
os
.
path
.
abspath
(
snakemake
.
output
[
0
])
.
split
(
"all_lesions"
)[
0
]
#Select reference file
ref
=
snakemake
.
params
.
get
(
"ref"
,
""
)
...
...
@@ -24,10 +24,11 @@ else:
#Check if cnv file is empty
cnv_file
=
snakemake
.
params
.
get
(
"cnv"
,
""
)
if
cnv_file
!=
""
:
cnv_arg
=
" --cnv "
+
cnv_file
if
os
.
stat
(
cnv_file
).
st_size
!=
0
else
""
cnv_arg
=
" --cnv "
+
cnv_file
if
os
.
stat
(
cnv_file
).
st_size
!=
0
else
"
"
else
:
cnv_arg
=
""
cnv_arg
=
"
"
print
(
cnv_arg
)
#Additional arguments
extra
=
snakemake
.
params
.
get
(
"extra"
,
""
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment