Commit b6e501b7 authored by Beatrice Tan's avatar Beatrice Tan

Made names of rules more understandable.

parent 80b3c845
from Circos import InputCircos, bed_to_circos
rule circos_input:
rule make_CIRCOS_input:
"""Make input files for making a circos diagram."""
input:
seg="Input/Segments_tumor.txt",
......@@ -14,7 +14,7 @@ rule circos_input:
run:
InputCircos(input.seg, input.gistic, input.rubic_gains, input.rubic_losses, output.seg, output.gistic, output.rubic)
rule make_circos:
rule make_CIRCOS_plot:
"""Make circos diagram of recurrent regions in RUBIC and GISTIC2.0"""
input:
seg="Reports/Circos/Segments.txt",
......@@ -23,13 +23,13 @@ rule make_circos:
output:
"Reports/Circos/RecurrentRegions.png"
params:
workflow.basedir + "/scripts/circos/circos.conf"
conf=workflow.basedir + "/scripts/circos/circos.conf"
conda:
workflow.basedir + "/envs/circos.yaml"
shell:
"circos -conf {params[0]} -outputfile {output[0]} -param cnv_file={input.seg} -param gistic_file={input.gistic} -param rubic_file={input.rubic}"
"circos -conf {params.conf} -outputfile {output[0]} -param cnv_file={input.seg} -param gistic_file={input.gistic} -param rubic_file={input.rubic}"
rule make_legend_circos:
rule make_legend_CIRCOS:
output:
"Reports/Circos/legend.png"
run:
......@@ -45,7 +45,7 @@ rule make_legend_circos:
bbox = legend.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
fig.savefig(output[0], dpi=400, bbox_inches=bbox)
rule add_legend_circos:
rule add_legend_CIRCOS:
input:
"Reports/Circos/RecurrentRegions.png",
"Reports/Circos/legend.png"
......@@ -61,7 +61,7 @@ rule add_legend_circos:
circos.paste(legend, offset)
circos.save(output[0])
rule circos_input_zoom:
rule circos_input_zoom: #necessary?
"""Make input files for making a circos diagram."""
input:
bed="Reports/Overlap_known_genes.bed"
......@@ -72,17 +72,22 @@ rule circos_input_zoom:
run:
bed_to_circos(input.bed, output.rubic, output.gistic, output.genes)
rule make_circos_zoom:
def get_list_genes(gene_file):
with open(gene_file, 'r') as genes:
for line in genes:
print(line)
rule make_zoomed_CIRCOS_plot:
"""Compare locations of known genes, recurrent regions from RUBIC and recurrent regions from GISTIC2."""
input:
gistic="Reports/Circos/Zoom/GISTIC.txt",
rubic="Reports/Circos/Zoom/RUBIC.txt",
genes="Reports/Circos/Zoom/Genes.txt",
genes="Reports/Circos/Zoom/Genes.txt"
output:
plots="Reports/Overlap_plots/12.png"
plots=expand("Reports/Overlap_plots/{gene}.png", gene="NR6") #get_list_genes(input.genes))
params:
workflow.basedir + "/scripts/circos/circos_zoom.conf",
chrom='hs12'
chrom='hs12:67-71'
conda:
workflow.basedir + "/envs/circos.yaml"
shell:
......
......@@ -31,7 +31,7 @@ rule report_tools:
output.genes_both, output.genes_gistic, output.genes_rubic,
input.overlap, output.bed_known)
rule bed_intersect:
rule get_overlap_GISTIC_RUBIC:
"""Intersect the recurrent regions detected by RUBIC and GISTIC2.0."""
input:
gistic="GISTIC/regions_track.conf_" + config["gistic_precision"] + ".bed",
......@@ -43,7 +43,7 @@ rule bed_intersect:
shell:
"bedtools intersect -a {input.gistic} -b {input.rubic} -wo > {output}"
rule bed_known_genes:
rule get_overlap_known_genes:
"""Intersect known genes and recurrent regions detected by RUBIC and GISTIC2.0."""
input:
known="Reports/Locations_known_genes.bed",
......
def get_settings(nr_settings, all_settings):
print(nr_settings)
print(all_settings)
print(list(range(len(config["comparison_settings"]))))
rule gistic_settings:
"""Run GISTIC2 based on different settings."""
input:
gistic_directory=os.path.join(config["gisticdir"], "gistic2"),
seg="Input/Segments_tumor.txt"
seg="Input/Segments_tumor.txt",
lambda wildcards: config["comparison_settings"][wildcards.setting]
output:
expand("Settings/GISTIC_{setting_nr}/all_lesions.conf_" + config["gistic_precision"] + ".txt", setting_nr=range(len(config["comparison_settings"])),
expand("Settings/GISTIC_{setting_nr}/regions_track.conf_" + config["gistic_precision"] + ".bed", setting_nr=range(len(config["comparison_settings"]))
"Settings/GISTIC_wildcards.setting/all_lesions.conf_" + config["gistic_precision"] + ".txt",
"Settings/GISTIC_wildcards.setting/regions_track.conf_" + config["gistic_precision"] + ".bed"
params:
cnv="",
ref=config["reference"],
ref_file="",
extra=config["comparison_settings"][{setting_nr}]
extra="wildcards.setting",
confidence=config["gistic_precision"]
wrapper:
"file:" + workflow.basedir + "/wrappers/GISTIC2"
......
import datetime
import os.path
rule install_gistic:
rule install_GISTIC:
"""Install GISTIC2 to a directory of choice."""
output:
os.path.join(config["gisticdir"], "gistic2")
......@@ -10,7 +10,7 @@ rule install_gistic:
shell:
"{workflow.basedir}/scripts/install_gistic2.sh {params}"
rule run_gistic:
rule run_GISTIC:
"""Run GISTIC2 for the tumor segmentation data."""
input:
gistic_directory=os.path.join(config["gisticdir"], "gistic2"),
......@@ -22,7 +22,8 @@ rule run_gistic:
cnv="",
ref=config["reference"],
ref_file="",
extra=config["settings_gistic"]
extra=config["settings_gistic"],
confidence=config["gistic_precision"]
benchmark:
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
......
rule go_analysis:
rule do_GO_analysis:
"""Perform a gene ontology enrichment analysis using topGO."""
input:
gene_list="Reports/Genes_{tool}.txt"
......@@ -13,7 +13,7 @@ rule go_analysis:
wrapper:
"file:" + workflow.basedir + "/wrappers/topgo"
rule compare_enriched_terms:
rule compare_enriched_GOs:
"""Compare the top 50 GO terms detected by RUBIC and GISTIC2.0"""
input:
go=expand("GO/{tool}.txt", tool=["GISTIC2", "RUBIC"])
......
import ReportSegments
rule firehose:
rule download_firehose_data:
"""Download copy number profiles for a cancer type of choice from firehose."""
output:
"Input/Segments_all.txt"
......@@ -22,11 +22,11 @@ def pipeline_input(wildcards):
if (provided_input != ""): #use provided input file
return provided_input
elif (provided_input == "") and (config["cancer_type"] != "") and (config["date_data"] != ""): #use firehose data
return rules.firehose.output
return rules.download_firehose_data.output
else:
raise ValueError("Provide either a segmentation file as input or get firehose data based on the cancer type and date of choice.")
rule define_input:
rule define_input_pipeline:
"""Create a tumor and normal segmentation file based on the provided input."""
input:
pipeline_input
......@@ -46,7 +46,7 @@ rule define_input:
else: #split firehose data in tumor and normal files.
split_normal_tumor(input[0], output.tumor, output.normal)
rule report_seg:
rule report_segmentation_file:
"""Report information on the input segmentation file."""
input:
seg="Input/Segments_tumor.txt"
......
from Rubic import MarkerFile, BedFile
rule markers_rubic:
rule make_marker_file_RUBIC:
"""Make marker file to use as input for RUBIC based on segmentation file (start, center and end positions of each segment)."""
input:
"Input/Segments_tumor.txt"
......@@ -9,7 +9,7 @@ rule markers_rubic:
run:
MarkerFile(input[0], output[0])
rule run_rubic:
rule run_RUBIC:
"""Run RUBIC for the tumor segmentation data."""
input:
seg="Input/Segments_tumor.txt",
......@@ -26,7 +26,7 @@ rule run_rubic:
wrapper:
"file:" + workflow.basedir +"/wrappers/rubic"
rule make_bed_rubic:
rule make_bed_file_RUBIC:
input:
gains="RUBIC/gains.txt",
losses="RUBIC/losses.txt"
......
......@@ -2,8 +2,9 @@ import ReportSizes
from SampleSizes import SegFile
import os.path
import datetime
from AUC import ROC_curve
rule seg_subsets:
rule get_segmentation_files_subsets:
"""Create segmentation files with different numbers of samples (randomly chosen) for a number of times."""
input:
"Input/Segments_tumor.txt"
......@@ -12,7 +13,7 @@ rule seg_subsets:
run:
SegFile(input[0], output)
rule run_gistic_subsets:
rule run_GISTIC_subsets:
"""Run GISTIC2 for the segmentation files with different subsets."""
input:
gistic_directory=os.path.join(config["gisticdir"], "gistic2"),
......@@ -24,11 +25,12 @@ rule run_gistic_subsets:
cnv="",
ref=config["reference"],
ref_file="",
extra=config["settings_gistic"]
extra=config["settings_gistic"],
confidence=config["gistic_precision"]
wrapper:
"file:" + workflow.basedir + "/wrappers/GISTIC2"
rule run_rubic_subsets:
rule run_RUBIC_subsets:
"""Run RUBIC for the segmentation files with different subsets."""
input:
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt",
......@@ -48,8 +50,8 @@ rule run_rubic_subsets:
#"68ec08a/bio/rubic"
"file:" + workflow.basedir +"/wrappers/rubic"
rule report_sizes:
"""Report the difference when using different sample sizes."""
rule report_size_differences:
"""Report the differences when using different sample sizes."""
input:
gistic=expand("Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/all_lesions.conf_" + config["gistic_precision"] + ".txt", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic_gains=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/gains.txt", rand_nr=config["sizes"], rep_nr=config["repeats"]),
......@@ -65,3 +67,55 @@ rule report_sizes:
ref=config["reference"]
run:
ReportSizes.make_report(input.gistic, input.rubic_gains, input.rubic_losses, params.census, params.known, params.reps, params.sizes, params.ref, output.report, output.plots)
rule make_bed_RUBIC_subsets:
"""Make a bed file with the regions detected using RUBIC with a subset of the total sample."""
input:
gains="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/gains.txt",
losses="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/losses.txt"
output:
bed="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/regions_track.bed"
run:
BedFile(input.gains, input.losses, output.bed)
rule compare_subset_truth_GISTIC:
"""Do bedtools intersect between the GISTIC bed file with subset and bed file all samples."""
input:
bed_subsets="Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/regions_track.conf_" + config["gistic_precision"] + ".bed",
bed_truth="GISTIC/regions_track.conf_" + config["gistic_precision"] + ".bed"
output:
subset="Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/Overlap_subset_truth.bed",
truth="Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/Overlap_truth_subset.bed",
conda:
workflow.basedir + "/envs/bedtools.yaml"
shell:
"bedtools intersect -a {input.bed_subsets} -b {input.bed_truth} -wao > {output.subset} && \
bedtools intersect -a {input.bed_truth} -b {input.bed_subsets} -wao > {output.truth}"
rule compare_subset_truth_RUBIC:
"""Do bedtools intersect between the RUBIC bed file with subset and bed file all samples."""
input:
bed_subsets="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/regions_track.bed",
bed_truth="RUBIC/regions_track.bed"
output:
subset="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/Overlap_subset_truth.bed",
truth="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/Overlap_truth_subset.bed",
conda:
workflow.basedir + "/envs/bedtools.yaml"
shell:
"bedtools intersect -a {input.bed_subsets} -b {input.bed_truth} -wao > {output.subset} && \
bedtools intersect -a {input.bed_truth} -b {input.bed_subsets} -wao > {output.truth}"
rule make_ROC_plot:
"""Make ROC plot on the precision and recall from the subsets using GISTIC and RUBIC."""
input:
gistic_subset=expand("Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/Overlap_subset_truth.bed", rand_nr=config["sizes"], rep_nr=config["repeats"]),
gistic_truth=expand("Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/Overlap_truth_subset.bed", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic_subset=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/Overlap_subset_truth.bed", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic_truth=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/Overlap_truth_subset.bed", rand_nr=config["sizes"], rep_nr=config["repeats"]),
output:
AUC="Samplesize/Precision_recall.png"
params:
sizes=config["sizes"]
run:
ROC_curve(input.gistic_subset, input.gistic_truth, input.rubic_subset, input.rubic_truth, output.AUC, params.sizes)
import ReportControl
rule run_gistic_control:
rule run_GISTIC_control:
"""Run GISTIC2 for the tumor segmentation data with data from control samples included."""
input:
gistic_directory=os.path.join(config["gisticdir"], "gistic2"),
seg="Input/Segments_tumor.txt"
output:
"Control/"
"Control/all_lesions.conf_" + config["gistic_precision"] + ".txt",
"Control/regions_track.conf_" + config["gistic_precision"] + ".bed"
params:
cnv="Input/Segments_normal.txt",
gistic_directory=config["gisticdir"],
ref=config["reference"],
ref_file="",
extra=""
extra="",
confidence=config["gistic_precision"]
wrapper:
"file:" + os.path.join(workflow.basedir, "wrappers/GISTIC2")
"file:" + workflow.basedir + "/wrappers/GISTIC2"
rule report_control:
"""Report the differences between using a control and without using a control."""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment