Commit 4592528e authored by Beatrice Tan's avatar Beatrice Tan

Updated report on sample sizes.

parent 7c36cd45
......@@ -37,11 +37,11 @@ onerror:
rule all:
"""Define desired output from pipeline."""
input:
"Samplesize/Report.txt",
#"Reports/Circos/RecurrentRegions.png"
#"GO/comparison.txt",
#"Input/Segments_tumor.txt",
#"Reports/Control.txt",
"Reports/Tools.txt",
#"Reports/Tools.txt",
#"Reports/Results.html",
#"Reports/Control.txt",
......
......@@ -16,7 +16,17 @@ prev_found_genes: input_files/intogen-CM-drivers-data.tsv
census_genes: input_files/Census_genes.txt
biomart_genes: input_files/biomart_human_genes.tsv
precision: '99'
#Settings GISTIC2.0
gistic_precision: "99"
settings_gistic: "-ta 0.1
-td 0.1
-qvt 0.25
-brlen 0.7
-cap 1.5
-rx 1
-genegistic 1"
#Settings for sample size differences
sizes: [20, 30, 40, 50, 60, 70, 80, 90]
......
......@@ -17,12 +17,12 @@ rule run_gistic:
gistic_directory=config["gisticdir"],
seg="Input/Segments_tumor.txt"
output:
"GISTIC/all_lesions.conf_" + config["precision"] + ".txt"
"GISTIC/all_lesions.conf_" + config["gistic_precision"] + ".txt"
params:
cnv="",
ref=config["reference"],
ref_file="",
extra=""
extra=config["settings_gistic"]
benchmark:
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt" #space between date and time!
wrapper:
......@@ -85,7 +85,7 @@ rule make_circos:
rule report_tools:
"""Report the differences in calls between GISTIC2 and RUBIC."""
input:
gistic="GISTIC/all_lesions.conf_" + config["precision"] + ".txt",
gistic="GISTIC/all_lesions.conf_" + config["gistic_precision"] + ".txt",
rubic_gain="RUBIC/gains.txt",
rubic_loss="RUBIC/losses.txt"
output:
......
......@@ -5,55 +5,51 @@ from Rubic import MarkerFile
rule seg_subsets:
"""Create segmentation files with different numbers of samples (randomly chosen) for a number of times."""
input:
config["inputfile"] + "_new"
"Input/Segments_tumor.txt"
output:
expand("Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt", rand_nr=config["sizes"], rep_nr=config["repeats"])
run:
SegFile(input[0], output[0])
rule gistic_subsets:
rule run_gistic_subsets:
"""Run GISTIC2 for the segmentation files with different subsets."""
input:
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt",
cnv=""
gistic_directory=config["gisticdir"],
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt"
output:
"Samplesize/GISTIC2/Size{rand_nr}.Rep{rep_nr}/"
"Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/all_lesions.conf_" + config["gistic_precision"] + ".txt"
params:
cnv="",
gistic_directory=config["gisticdir"],
ref=config["reference"],
ref_file="",
extra=""
extra=config["settings_gistic"]
wrapper:
"file:" + os.path.join(workflow.basedir, "wrappers/GISTIC2")
"file:" + workflow.basedir + "/wrappers/GISTIC2"
rule seg_rubic_subsets:
"""Create segmentation files to use as input for RUBIC."""
input:
"Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt"
output:
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}_rubic.txt",
marker="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}_marker.txt"
run:
get_seg_rubic(input[0], output.seg, output.marker)
rule rubic_subsets:
rule run_rubic_subsets:
"""Run RUBIC for the segmentation files with different subsets."""
input:
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}_rubic.txt",
markers="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}_marker.txt"
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt",
markers="Input/Markers.txt"
output:
"Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/"
out_gains="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/gains.txt",
out_losses="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/losses.txt",
out_plots="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/plots"
params:
fdr="0.25",
genefile=config["biomart_genes"]
genefile=os.path.join(workflow.basedir, config["biomart_genes"]) if config["biomart_genes"].startswith("input_files") else config["bimart_genes"]
benchmark:
"Benchmarks/RUBIC." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" + os.path.join(workflow.basedir, "wrappers/rubic")
"file:" + workflow.basedir +"/wrappers/rubic"
rule report_sizes:
"""Report the difference when using different sample sizes."""
input:
gistic=expand("Samplesize/GISTIC2/Size{rand_nr}.Rep{rep_nr}/", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/", rand_nr=config["sizes"], rep_nr=config["repeats"])
gistic=expand("Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/all_lesions.conf_" + config["gistic_precision"] + ".txt", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic_gains=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/gains.txt", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic_losses=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/losses.txt", rand_nr=config["sizes"], rep_nr=config["repeats"])
output:
report="Samplesize/Report.txt",
plots="Samplesize/Plots/"
......
......@@ -293,15 +293,16 @@ class ReportSizes:
with open(report_file, 'w') as out:
dict_stats = OrderedDict()
row_names = (["Size", "Type", "Nr. regions", "Avg. size (Kb)", "Total size (Mb)",
"Nr. genes", "Nr. regions with census genes", "Nr. regions with known genes"])
"Nr. regions with census genes", "Nr. regions with known genes", "Nr. genes"])
tool = GISTIC2
for size_file in size_results:
parsed_gistic = parse().gistic_results(size_file)
parsed_gistic = parsed_results = parse(size_file, known_genes, census_genes, tool)
size, repetition = size_file.split("/")[-1].split("x")
stats_gistic = self.calculate_stats(parsed_gistic, census_genes, known_genes, size)
stats_results = get_stats(parsed_results, size)
if size not in dict_stats.keys():
dict_stats[size] = [stats_gistic]
dict_stats[size] = [stats_results]
else:
dict_stats[size] = dict_stats[size] + [stats_gistic]
dict_stats[size] = dict_stats[size] + [stats_results]
out.write(dict_stats)
self.extract_stats_sizes(dict_stats, reps, plot_dir)
......@@ -318,9 +319,9 @@ class ReportSizes:
if reg_size != 'N/A':
avg_size.append(float(reg_size))
total_size.append(float(stats[4]))
nr_genes.append(float(stats[5]))
nr_census.append(float(stats[7].split(" (")[0]))
nr_known.append(float(stats[8].split(" (")[0]))
nr_genes.append(float(stats[7]))
nr_census.append(float(stats[6].split(" (")[0]))
nr_known.append(float(stats[5].split(" (")[0]))
plot_data = [nr_regions, avg_size, total_size, nr_genes, nr_census, nr_known]
plot_y_axis = (['Number of recurrent regions', 'Average size of regions (Kb)', 'Total size (Mb)',
'Number of genes', 'Nr. regions with census genes', 'Nr. regions with known genes'])
......
......@@ -9,7 +9,7 @@ from snakemake.shell import shell
#Convert file locations to absolute paths
segments = os.path.abspath(snakemake.input.seg)
gistic_dir = os.path.abspath(snakemake.input.gistic_directory)
outfolder = os.path.abspath(snakemake.output[0])
outfolder = os.path.abspath(snakemake.output[0]).split("all_lesions")[0]
#Select reference file
ref = snakemake.params.get("ref", "")
......@@ -24,10 +24,11 @@ else:
#Check if cnv file is empty
cnv_file = snakemake.params.get("cnv", "")
if cnv_file != "":
cnv_arg = " --cnv " + cnv_file if os.stat(cnv_file).st_size != 0 else ""
cnv_arg = " --cnv " + cnv_file if os.stat(cnv_file).st_size != 0 else " "
else:
cnv_arg = ""
cnv_arg = " "
print(cnv_arg)
#Additional arguments
extra = snakemake.params.get("extra", "")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment