Commit 0ba99b31 authored by Beatrice Tan's avatar Beatrice Tan
Browse files

Fixed small mistakes.

parent 7a860865
......@@ -5,6 +5,7 @@ configfile: "config.yaml"
import sys
sys.path.insert(0, workflow.basedir + "/scripts")
#Rules to run pipeline for prioritization of regions and genes.
include: "rules/PreprocessInput.smk"
include: "rules/RecurrentRegions.smk"
......@@ -17,6 +18,7 @@ include: "rules/UseControl.smk"
#Directory to save all files.
workdir: config["workdir"]
#Print information on start, succes and error
onstart:
print("\nExecuting pipeline to identify recurrent copy number alterations and candidate genes based on copy number profiles from tumor NGS data.\n")
......@@ -37,16 +39,13 @@ onerror:
rule all:
"""Define desired output from pipeline."""
input:
#"Samplesize/Report.txt",
#"Reports/Circos/RecurrentRegions.png"
#"GO/comparison.txt",
#"Reports/Control.txt",
"Reports/Tools.txt",
#"Reports/Results.html",
#"Reports/Control.txt",
"Reports/Segments.txt",
"GO/comparison.txt"
rule help:
"""Print list of all targets with help."""
run:
for rule in workflow.rules:
print(rule.name + "\t" + rule.docstring)
print('- ' + rule.name + "\t" + rule.docstring)
......@@ -24,9 +24,8 @@ settings_gistic: "-ta 0.1
-brlen 0.7
-cap 1.5
-rx 1
-genegistic 1"
-genegistic 1
-conf 0.99"
#Settings for sample size differences
sizes: [20, 30, 40, 50, 60, 70, 80, 90]
......
channels:
- conda-forge
- edurand
- bioconda
dependencies:
- matplotlib-venn =0.11.5
- pyensembl =1.1.0
- circos =0.69.6
......@@ -24,7 +24,7 @@ rule run_gistic:
ref_file="",
extra=config["settings_gistic"]
benchmark:
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt" #space between date and time!
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" + workflow.basedir + "/wrappers/GISTIC2"
......@@ -58,7 +58,7 @@ rule circos_input:
"""Make input files for making a circos diagram."""
input:
seg="Reports/Segments.txt",
gistic="GISTIC/",
gistic="GISTIC/all_lesions.conf_" + config["gistic_precision"] + ".txt",
rubic_gains="RUBIC/gains.txt",
rubic_losses="RUBIC/losses.txt"
output:
......
......@@ -9,14 +9,16 @@
#$ -m be
#$ -M b.f.tan@lumc.nl
conda-env create -n CNAprioritization -f envs/pipeline.yaml
conda-env update -n CNAprioritization -f envs/pipeline.yaml
source activate CNAprioritization
snakemake -p \
--configfile $@ \
--snakefile #/exports/sasc/dcats/snakemake/RNAseq_multi_method/Snakefile \
--snakefile #/.../bftan/Snakefile \
--latency-wait 90 \
# --drmaa " -N preprocessor -pe BWA {threads} -l h_vmem={resources.mem}G -q all.q -cwd -V" \
# --drmaa-log-dir $(pwd)/.cluster_logs \
--jobs 4 \
--max-jobs-per-second 10 \
--restart-times 3 \
--use-conda \
--conda-prefix #/exports/sasc/dcats/snakemake/.conda
--conda-prefix #/.../bftan/snakemake/.conda
......@@ -3,9 +3,9 @@ import os.path
class InputCircos:
"""Make input files for a Circos plot."""
def __init__(self, seg, gistic_dir, rubic_gain, rubic_loss, cir_seg, cir_gistic, cir_rubic):
def __init__(self, seg, gistic_results, rubic_gain, rubic_loss, cir_seg, cir_gistic, cir_rubic):
self.make_seg_file(seg, cir_seg)
self.make_gistic_file(gistic_dir, cir_gistic)
self.make_gistic_file(gistic_results, cir_gistic)
self.make_rubic_file(rubic_gain, rubic_loss, cir_rubic)
def make_seg_file(self, seg, cir_seg):
......@@ -18,10 +18,9 @@ class InputCircos:
out_line = ["hs" + chrom, start, end, seg_mean]
out.write(" ".join(out_line) + "\n")
def make_gistic_file(self, gistic, cir_gistic):
def make_gistic_file(self, gistic_file, cir_gistic):
""""Make input gistic file."""
with open(cir_gistic, 'w') as out:
gistic_file = os.path.join(gistic, "all_lesions.conf_75.txt")
with open(gistic_file, 'r') as recurrent:
recurrent.readline()
for line in recurrent:
......
......@@ -135,30 +135,14 @@ class ReportTools:
for cnv in tool:
out.write(tool_name + "\t")
cnv_type = "Amplification" if cnv[3] == "amp" else "Deletion"
begin_row = cnv[0:3] + [cnv_type, str(cnv[4])]
q_vals = ", ".join(cnv[4]) if type(cnv[4]) == tuple else cnv[4]
begin_row = cnv[0:3] + [cnv_type, str(q_vals)]
out.write("\t".join(begin_row))
for list_genes in range(5,8):
out.write("\t" + ", ".join(cnv[list_genes]))
out.write("\n")
tool_name = "RUBIC"
# def overlap_most_sign(self, parsed_results):
# """Examine whether most significant regions (highest log10(qvalue)) are also found by other tool."""
# for tool in parsed_results:
# tool_qvals = {}
# for cnv in tool:
# qval = cnv[4]
# if type(qval) == tuple: #rubic dict
# qval = qval[0]
# qval = float(qval)
# tool_qvals[qval] = cnv
# sorted_qvals = sorted(tool_qvals, reverse=True)
# print(sorted_qvals)
# for top in range(0,10):
# top_cnv = tool_qvals[sorted_qvals[top]]
# print(top_cnv)
def make_genefiles(self, parsed_results, out_GISTIC, out_RUBIC):
"""Make files with all genes reported by GISTIC2 and RUBIC."""
genes_all = []
......@@ -238,9 +222,9 @@ class ReportTools:
location_gistic = gistic_cnv[0:3]
if gene not in location_dict:
location_dict[gene] = gene_name, location_gene, location_gistic, location_rubic
else:
print(location_dict[gene])
print(gene_name, location_gene, location_gistic, location_rubic)
#else:
#print(location_dict[gene])
#print(gene_name, location_gene, location_gistic, location_rubic)
tool = "RUBIC"
with open(overlap_file, 'w') as out:
for gene in location_dict.keys():
......@@ -248,24 +232,6 @@ class ReportTools:
out_line = [gene_name, location_gene[0] + ":" + "-".join(location_gene[1:3]), location_gistic[0] + ":" + "-".join(location_gistic[1:3]), location_rubic[0] + ":" + "-".join(location_rubic[1:3])]
out.write("\t".join(out_line) + "\n")
# top_genes = []
# top_genes = parse().gene_file(known_genes, 10)
# #print(top_genes)
# with open(overlap_file, 'w') as out:
# for gene in top_genes:
# out.write(gene + "\n")
#print(gene)
# location_gene = self.get_location_gene(gene_file, gene)
# location_gene = list(map(str, location_gene))
#print(location_gene)
# out.write("Location of gene:\t" + "\t".join(location_gene) + "\n")
# if location_gene != ('N/A', 'N/A', 'N/A'):
# location_gistic = self.get_location_gistic(gistic_results, location_gene)
# out.write("Location CNV GISTIC:\t" + "\t".join(location_gistic) + "\n")
# location_rubic = self.get_location_rubic(rubic_results, gene)
# out.write("Location CNV RUBIC:\t" + "\t".join(location_rubic) + "\n\n")
def get_location_gene(self, biomart_file, gene_of_interest):
"""Get location of a gene from a biomart gene file"""
location = ['N/A', 'N/A', 'N/A']
......@@ -284,7 +250,7 @@ class ReportTools:
# continue
return location, gene_name
def get_location_gistic(self, gistic_results, location_gene):
def get_location_gistic(self, gistic_results, location_gene): #more regions overlap: extract one with longest overlap
"""Get location of the CNV detected by GISTIC2 harboring the gene of interest"""
loc = ['N/A', 'N/A', 'N/A']
chrom_gene, start_gene, end_gene = location_gene
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment