Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Beatrice Tan
CNAprioritization
Commits
b6e501b7
Commit
b6e501b7
authored
Feb 07, 2018
by
Beatrice Tan
Browse files
Made names of rules more understandable.
parent
80b3c845
Changes
9
Hide whitespace changes
Inline
Side-by-side
rules/Circos.smk
View file @
b6e501b7
from Circos import InputCircos, bed_to_circos
rule
circos
_input:
rule
make_CIRCOS
_input:
"""Make input files for making a circos diagram."""
input:
seg="Input/Segments_tumor.txt",
...
...
@@ -14,7 +14,7 @@ rule circos_input:
run:
InputCircos(input.seg, input.gistic, input.rubic_gains, input.rubic_losses, output.seg, output.gistic, output.rubic)
rule make_
circos
:
rule make_
CIRCOS_plot
:
"""Make circos diagram of recurrent regions in RUBIC and GISTIC2.0"""
input:
seg="Reports/Circos/Segments.txt",
...
...
@@ -23,13 +23,13 @@ rule make_circos:
output:
"Reports/Circos/RecurrentRegions.png"
params:
workflow.basedir + "/scripts/circos/circos.conf"
conf=
workflow.basedir + "/scripts/circos/circos.conf"
conda:
workflow.basedir + "/envs/circos.yaml"
shell:
"circos -conf {params
[0]
} -outputfile {output[0]} -param cnv_file={input.seg} -param gistic_file={input.gistic} -param rubic_file={input.rubic}"
"circos -conf {params
.conf
} -outputfile {output[0]} -param cnv_file={input.seg} -param gistic_file={input.gistic} -param rubic_file={input.rubic}"
rule make_legend_
circos
:
rule make_legend_
CIRCOS
:
output:
"Reports/Circos/legend.png"
run:
...
...
@@ -45,7 +45,7 @@ rule make_legend_circos:
bbox = legend.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
fig.savefig(output[0], dpi=400, bbox_inches=bbox)
rule add_legend_
circos
:
rule add_legend_
CIRCOS
:
input:
"Reports/Circos/RecurrentRegions.png",
"Reports/Circos/legend.png"
...
...
@@ -61,7 +61,7 @@ rule add_legend_circos:
circos.paste(legend, offset)
circos.save(output[0])
rule circos_input_zoom:
rule circos_input_zoom:
#necessary?
"""Make input files for making a circos diagram."""
input:
bed="Reports/Overlap_known_genes.bed"
...
...
@@ -72,17 +72,22 @@ rule circos_input_zoom:
run:
bed_to_circos(input.bed, output.rubic, output.gistic, output.genes)
rule make_circos_zoom:
def get_list_genes(gene_file):
with open(gene_file, 'r') as genes:
for line in genes:
print(line)
rule make_zoomed_CIRCOS_plot:
"""Compare locations of known genes, recurrent regions from RUBIC and recurrent regions from GISTIC2."""
input:
gistic="Reports/Circos/Zoom/GISTIC.txt",
rubic="Reports/Circos/Zoom/RUBIC.txt",
genes="Reports/Circos/Zoom/Genes.txt"
,
genes="Reports/Circos/Zoom/Genes.txt"
output:
plots="Reports/Overlap_plots/
12.png"
plots=
expand(
"Reports/Overlap_plots/
{gene}.png", gene="NR6") #get_list_genes(input.genes))
params:
workflow.basedir + "/scripts/circos/circos_zoom.conf",
chrom='hs12'
chrom='hs12
:67-71
'
conda:
workflow.basedir + "/envs/circos.yaml"
shell:
...
...
rules/ComparisonRegions.smk
View file @
b6e501b7
...
...
@@ -31,7 +31,7 @@ rule report_tools:
output.genes_both, output.genes_gistic, output.genes_rubic,
input.overlap, output.bed_known)
rule
bed_intersect
:
rule
get_overlap_GISTIC_RUBIC
:
"""Intersect the recurrent regions detected by RUBIC and GISTIC2.0."""
input:
gistic="GISTIC/regions_track.conf_" + config["gistic_precision"] + ".bed",
...
...
@@ -43,7 +43,7 @@ rule bed_intersect:
shell:
"bedtools intersect -a {input.gistic} -b {input.rubic} -wo > {output}"
rule
bed
_known_genes:
rule
get_overlap
_known_genes:
"""Intersect known genes and recurrent regions detected by RUBIC and GISTIC2.0."""
input:
known="Reports/Locations_known_genes.bed",
...
...
rules/ComparisonSettings.smk
View file @
b6e501b7
def get_settings(nr_settings, all_settings):
print(nr_settings)
print(all_settings)
print(list(range(len(config["comparison_settings"]))))
rule gistic_settings:
"""Run GISTIC2 based on different settings."""
input:
gistic_directory=os.path.join(config["gisticdir"], "gistic2"),
seg="Input/Segments_tumor.txt"
seg="Input/Segments_tumor.txt",
lambda wildcards: config["comparison_settings"][wildcards.setting]
output:
expand(
"Settings/GISTIC_
{
setting
_nr}
/all_lesions.conf_" + config["gistic_precision"] + ".txt",
setting_nr=range(len(config["comparison_settings"])),
expand(
"Settings/GISTIC_
{
setting
_nr}
/regions_track.conf_" + config["gistic_precision"] + ".bed"
, setting_nr=range(len(config["comparison_settings"]))
"Settings/GISTIC_
wildcards.
setting/all_lesions.conf_" + config["gistic_precision"] + ".txt",
"Settings/GISTIC_
wildcards.
setting/regions_track.conf_" + config["gistic_precision"] + ".bed"
params:
cnv="",
ref=config["reference"],
ref_file="",
extra=config["comparison_settings"][{setting_nr}]
extra="wildcards.setting",
confidence=config["gistic_precision"]
wrapper:
"file:" + workflow.basedir + "/wrappers/GISTIC2"
...
...
rules/GISTIC2.smk
View file @
b6e501b7
import datetime
import os.path
rule install_
gistic
:
rule install_
GISTIC
:
"""Install GISTIC2 to a directory of choice."""
output:
os.path.join(config["gisticdir"], "gistic2")
...
...
@@ -10,7 +10,7 @@ rule install_gistic:
shell:
"{workflow.basedir}/scripts/install_gistic2.sh {params}"
rule run_
gistic
:
rule run_
GISTIC
:
"""Run GISTIC2 for the tumor segmentation data."""
input:
gistic_directory=os.path.join(config["gisticdir"], "gistic2"),
...
...
@@ -22,7 +22,8 @@ rule run_gistic:
cnv="",
ref=config["reference"],
ref_file="",
extra=config["settings_gistic"]
extra=config["settings_gistic"],
confidence=config["gistic_precision"]
benchmark:
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
...
...
rules/GenePrioritization.smk
View file @
b6e501b7
rule
go
_analysis:
rule
do_GO
_analysis:
"""Perform a gene ontology enrichment analysis using topGO."""
input:
gene_list="Reports/Genes_{tool}.txt"
...
...
@@ -13,7 +13,7 @@ rule go_analysis:
wrapper:
"file:" + workflow.basedir + "/wrappers/topgo"
rule compare_enriched_
term
s:
rule compare_enriched_
GO
s:
"""Compare the top 50 GO terms detected by RUBIC and GISTIC2.0"""
input:
go=expand("GO/{tool}.txt", tool=["GISTIC2", "RUBIC"])
...
...
rules/PreprocessInput.smk
View file @
b6e501b7
import ReportSegments
rule firehose:
rule
download_
firehose
_data
:
"""Download copy number profiles for a cancer type of choice from firehose."""
output:
"Input/Segments_all.txt"
...
...
@@ -22,11 +22,11 @@ def pipeline_input(wildcards):
if (provided_input != ""): #use provided input file
return provided_input
elif (provided_input == "") and (config["cancer_type"] != "") and (config["date_data"] != ""): #use firehose data
return rules.firehose.output
return rules.
download_
firehose
_data
.output
else:
raise ValueError("Provide either a segmentation file as input or get firehose data based on the cancer type and date of choice.")
rule define_input:
rule define_input
_pipeline
:
"""Create a tumor and normal segmentation file based on the provided input."""
input:
pipeline_input
...
...
@@ -46,7 +46,7 @@ rule define_input:
else: #split firehose data in tumor and normal files.
split_normal_tumor(input[0], output.tumor, output.normal)
rule report_seg:
rule report_seg
mentation_file
:
"""Report information on the input segmentation file."""
input:
seg="Input/Segments_tumor.txt"
...
...
rules/Rubic.smk
View file @
b6e501b7
from Rubic import MarkerFile, BedFile
rule ma
rkers_rubic
:
rule ma
ke_marker_file_RUBIC
:
"""Make marker file to use as input for RUBIC based on segmentation file (start, center and end positions of each segment)."""
input:
"Input/Segments_tumor.txt"
...
...
@@ -9,7 +9,7 @@ rule markers_rubic:
run:
MarkerFile(input[0], output[0])
rule run_
rubic
:
rule run_
RUBIC
:
"""Run RUBIC for the tumor segmentation data."""
input:
seg="Input/Segments_tumor.txt",
...
...
@@ -26,7 +26,7 @@ rule run_rubic:
wrapper:
"file:" + workflow.basedir +"/wrappers/rubic"
rule make_bed_
rubic
:
rule make_bed_
file_RUBIC
:
input:
gains="RUBIC/gains.txt",
losses="RUBIC/losses.txt"
...
...
rules/SampleSizes.smk
View file @
b6e501b7
...
...
@@ -2,8 +2,9 @@ import ReportSizes
from SampleSizes import SegFile
import os.path
import datetime
from AUC import ROC_curve
rule
seg
_subsets:
rule
get_segmentation_files
_subsets:
"""Create segmentation files with different numbers of samples (randomly chosen) for a number of times."""
input:
"Input/Segments_tumor.txt"
...
...
@@ -12,7 +13,7 @@ rule seg_subsets:
run:
SegFile(input[0], output)
rule run_
gistic
_subsets:
rule run_
GISTIC
_subsets:
"""Run GISTIC2 for the segmentation files with different subsets."""
input:
gistic_directory=os.path.join(config["gisticdir"], "gistic2"),
...
...
@@ -24,11 +25,12 @@ rule run_gistic_subsets:
cnv="",
ref=config["reference"],
ref_file="",
extra=config["settings_gistic"]
extra=config["settings_gistic"],
confidence=config["gistic_precision"]
wrapper:
"file:" + workflow.basedir + "/wrappers/GISTIC2"
rule run_
rubic
_subsets:
rule run_
RUBIC
_subsets:
"""Run RUBIC for the segmentation files with different subsets."""
input:
seg="Samplesize/Input/Size{rand_nr}.Rep{rep_nr}.txt",
...
...
@@ -48,8 +50,8 @@ rule run_rubic_subsets:
#"68ec08a/bio/rubic"
"file:" + workflow.basedir +"/wrappers/rubic"
rule report_sizes:
"""Report the difference when using different sample sizes."""
rule report_size
_difference
s:
"""Report the difference
s
when using different sample sizes."""
input:
gistic=expand("Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/all_lesions.conf_" + config["gistic_precision"] + ".txt", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic_gains=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/gains.txt", rand_nr=config["sizes"], rep_nr=config["repeats"]),
...
...
@@ -65,3 +67,55 @@ rule report_sizes:
ref=config["reference"]
run:
ReportSizes.make_report(input.gistic, input.rubic_gains, input.rubic_losses, params.census, params.known, params.reps, params.sizes, params.ref, output.report, output.plots)
rule make_bed_RUBIC_subsets:
"""Make a bed file with the regions detected using RUBIC with a subset of the total sample."""
input:
gains="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/gains.txt",
losses="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/losses.txt"
output:
bed="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/regions_track.bed"
run:
BedFile(input.gains, input.losses, output.bed)
rule compare_subset_truth_GISTIC:
"""Do bedtools intersect between the GISTIC bed file with subset and bed file all samples."""
input:
bed_subsets="Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/regions_track.conf_" + config["gistic_precision"] + ".bed",
bed_truth="GISTIC/regions_track.conf_" + config["gistic_precision"] + ".bed"
output:
subset="Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/Overlap_subset_truth.bed",
truth="Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/Overlap_truth_subset.bed",
conda:
workflow.basedir + "/envs/bedtools.yaml"
shell:
"bedtools intersect -a {input.bed_subsets} -b {input.bed_truth} -wao > {output.subset} && \
bedtools intersect -a {input.bed_truth} -b {input.bed_subsets} -wao > {output.truth}"
rule compare_subset_truth_RUBIC:
"""Do bedtools intersect between the RUBIC bed file with subset and bed file all samples."""
input:
bed_subsets="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/regions_track.bed",
bed_truth="RUBIC/regions_track.bed"
output:
subset="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/Overlap_subset_truth.bed",
truth="Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/Overlap_truth_subset.bed",
conda:
workflow.basedir + "/envs/bedtools.yaml"
shell:
"bedtools intersect -a {input.bed_subsets} -b {input.bed_truth} -wao > {output.subset} && \
bedtools intersect -a {input.bed_truth} -b {input.bed_subsets} -wao > {output.truth}"
rule make_ROC_plot:
"""Make ROC plot on the precision and recall from the subsets using GISTIC and RUBIC."""
input:
gistic_subset=expand("Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/Overlap_subset_truth.bed", rand_nr=config["sizes"], rep_nr=config["repeats"]),
gistic_truth=expand("Samplesize/GISTIC/Size{rand_nr}.Rep{rep_nr}/Overlap_truth_subset.bed", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic_subset=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/Overlap_subset_truth.bed", rand_nr=config["sizes"], rep_nr=config["repeats"]),
rubic_truth=expand("Samplesize/RUBIC/Size{rand_nr}.Rep{rep_nr}/Overlap_truth_subset.bed", rand_nr=config["sizes"], rep_nr=config["repeats"]),
output:
AUC="Samplesize/Precision_recall.png"
params:
sizes=config["sizes"]
run:
ROC_curve(input.gistic_subset, input.gistic_truth, input.rubic_subset, input.rubic_truth, output.AUC, params.sizes)
rules/UseControl.smk
View file @
b6e501b7
import ReportControl
rule run_
gistic
_control:
rule run_
GISTIC
_control:
"""Run GISTIC2 for the tumor segmentation data with data from control samples included."""
input:
gistic_directory=os.path.join(config["gisticdir"], "gistic2"),
seg="Input/Segments_tumor.txt"
output:
"Control/"
"Control/all_lesions.conf_" + config["gistic_precision"] + ".txt",
"Control/regions_track.conf_" + config["gistic_precision"] + ".bed"
params:
cnv="Input/Segments_normal.txt",
gistic_directory=config["gisticdir"],
ref=config["reference"],
ref_file="",
extra=""
extra="",
confidence=config["gistic_precision"]
wrapper:
"file:" +
os.path.join(
workflow.basedir
,
"wrappers/GISTIC2"
)
"file:" + workflow.basedir
+
"
/
wrappers/GISTIC2"
rule report_control:
"""Report the differences between using a control and without using a control."""
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment