Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Beatrice Tan
CNAprioritization
Commits
0ba99b31
Commit
0ba99b31
authored
Jan 29, 2018
by
Beatrice Tan
Browse files
Fixed small mistakes.
parent
7a860865
Changes
7
Hide whitespace changes
Inline
Side-by-side
Snakefile
View file @
0ba99b31
...
...
@@ -5,6 +5,7 @@ configfile: "config.yaml"
import sys
sys.path.insert(0, workflow.basedir + "/scripts")
#Rules to run pipeline for prioritization of regions and genes.
include: "rules/PreprocessInput.smk"
include: "rules/RecurrentRegions.smk"
...
...
@@ -17,6 +18,7 @@ include: "rules/UseControl.smk"
#Directory to save all files.
workdir: config["workdir"]
#Print information on start, succes and error
onstart:
print("\nExecuting pipeline to identify recurrent copy number alterations and candidate genes based on copy number profiles from tumor NGS data.\n")
...
...
@@ -37,16 +39,13 @@ onerror:
rule all:
"""Define desired output from pipeline."""
input:
#"Samplesize/Report.txt",
#"Reports/Circos/RecurrentRegions.png"
#"GO/comparison.txt",
#"Reports/Control.txt",
"Reports/Tools.txt",
#"Reports/Results.html",
#"Reports/Control.txt",
"Reports/Segments.txt",
"GO/comparison.txt"
rule help:
"""Print list of all targets with help."""
run:
for rule in workflow.rules:
print(rule.name + "\t" + rule.docstring)
print(
'- ' +
rule.name + "\t" + rule.docstring)
config.yaml
View file @
0ba99b31
...
...
@@ -24,9 +24,8 @@ settings_gistic: "-ta 0.1
-brlen
0.7
-cap
1.5
-rx
1
-genegistic
1"
-genegistic
1
-conf
0.99"
#Settings for sample size differences
sizes
:
[
20
,
30
,
40
,
50
,
60
,
70
,
80
,
90
]
...
...
envs/pipeline.yaml
View file @
0ba99b31
channels
:
-
conda-forge
-
edurand
-
bioconda
dependencies
:
-
matplotlib-venn =0.11.5
-
pyensembl =1.1.0
-
circos =0.69.6
rules/RecurrentRegions.smk
View file @
0ba99b31
...
...
@@ -24,7 +24,7 @@ rule run_gistic:
ref_file="",
extra=config["settings_gistic"]
benchmark:
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
#space between date and time!
"Benchmarks/GISTIC2." + str(datetime.datetime.now()).replace(" ", "_") + ".txt"
wrapper:
"file:" + workflow.basedir + "/wrappers/GISTIC2"
...
...
@@ -58,7 +58,7 @@ rule circos_input:
"""Make input files for making a circos diagram."""
input:
seg="Reports/Segments.txt",
gistic="GISTIC/",
gistic="GISTIC/
all_lesions.conf_" + config["gistic_precision"] + ".txt
",
rubic_gains="RUBIC/gains.txt",
rubic_losses="RUBIC/losses.txt"
output:
...
...
run_
snakemake
.sh
→
run_
on_cluster
.sh
View file @
0ba99b31
...
...
@@ -9,14 +9,16 @@
#$ -m be
#$ -M b.f.tan@lumc.nl
conda-env create
-n
CNAprioritization
-f
envs/pipeline.yaml
conda-env update
-n
CNAprioritization
-f
envs/pipeline.yaml
source
activate CNAprioritization
snakemake
-p
\
--configfile
$@
\
--snakefile
#/
exports/sasc/dcats/snakemake/RNAseq_multi_method
/Snakefile \
--snakefile
#/
.../bftan
/Snakefile \
--latency-wait
90
\
# --drmaa " -N preprocessor -pe BWA {threads} -l h_vmem={resources.mem}G -q all.q -cwd -V" \
# --drmaa-log-dir $(pwd)/.cluster_logs \
--jobs
4
\
--max-jobs-per-second
10
\
--restart-times
3
\
--use-conda
\
--conda-prefix
#/
exports/sasc/dcats
/snakemake/.conda
--conda-prefix
#/
.../bftan
/snakemake/.conda
scripts/Circos.py
View file @
0ba99b31
...
...
@@ -3,9 +3,9 @@ import os.path
class
InputCircos
:
"""Make input files for a Circos plot."""
def
__init__
(
self
,
seg
,
gistic_
dir
,
rubic_gain
,
rubic_loss
,
cir_seg
,
cir_gistic
,
cir_rubic
):
def
__init__
(
self
,
seg
,
gistic_
results
,
rubic_gain
,
rubic_loss
,
cir_seg
,
cir_gistic
,
cir_rubic
):
self
.
make_seg_file
(
seg
,
cir_seg
)
self
.
make_gistic_file
(
gistic_
dir
,
cir_gistic
)
self
.
make_gistic_file
(
gistic_
results
,
cir_gistic
)
self
.
make_rubic_file
(
rubic_gain
,
rubic_loss
,
cir_rubic
)
def
make_seg_file
(
self
,
seg
,
cir_seg
):
...
...
@@ -18,10 +18,9 @@ class InputCircos:
out_line
=
[
"hs"
+
chrom
,
start
,
end
,
seg_mean
]
out
.
write
(
" "
.
join
(
out_line
)
+
"
\n
"
)
def
make_gistic_file
(
self
,
gistic
,
cir_gistic
):
def
make_gistic_file
(
self
,
gistic
_file
,
cir_gistic
):
""""Make input gistic file."""
with
open
(
cir_gistic
,
'w'
)
as
out
:
gistic_file
=
os
.
path
.
join
(
gistic
,
"all_lesions.conf_75.txt"
)
with
open
(
gistic_file
,
'r'
)
as
recurrent
:
recurrent
.
readline
()
for
line
in
recurrent
:
...
...
scripts/Reports.py
View file @
0ba99b31
...
...
@@ -135,30 +135,14 @@ class ReportTools:
for
cnv
in
tool
:
out
.
write
(
tool_name
+
"
\t
"
)
cnv_type
=
"Amplification"
if
cnv
[
3
]
==
"amp"
else
"Deletion"
begin_row
=
cnv
[
0
:
3
]
+
[
cnv_type
,
str
(
cnv
[
4
])]
q_vals
=
", "
.
join
(
cnv
[
4
])
if
type
(
cnv
[
4
])
==
tuple
else
cnv
[
4
]
begin_row
=
cnv
[
0
:
3
]
+
[
cnv_type
,
str
(
q_vals
)]
out
.
write
(
"
\t
"
.
join
(
begin_row
))
for
list_genes
in
range
(
5
,
8
):
out
.
write
(
"
\t
"
+
", "
.
join
(
cnv
[
list_genes
]))
out
.
write
(
"
\n
"
)
tool_name
=
"RUBIC"
# def overlap_most_sign(self, parsed_results):
# """Examine whether most significant regions (highest log10(qvalue)) are also found by other tool."""
# for tool in parsed_results:
# tool_qvals = {}
# for cnv in tool:
# qval = cnv[4]
# if type(qval) == tuple: #rubic dict
# qval = qval[0]
# qval = float(qval)
# tool_qvals[qval] = cnv
# sorted_qvals = sorted(tool_qvals, reverse=True)
# print(sorted_qvals)
# for top in range(0,10):
# top_cnv = tool_qvals[sorted_qvals[top]]
# print(top_cnv)
def
make_genefiles
(
self
,
parsed_results
,
out_GISTIC
,
out_RUBIC
):
"""Make files with all genes reported by GISTIC2 and RUBIC."""
genes_all
=
[]
...
...
@@ -238,9 +222,9 @@ class ReportTools:
location_gistic
=
gistic_cnv
[
0
:
3
]
if
gene
not
in
location_dict
:
location_dict
[
gene
]
=
gene_name
,
location_gene
,
location_gistic
,
location_rubic
else
:
print
(
location_dict
[
gene
])
print
(
gene_name
,
location_gene
,
location_gistic
,
location_rubic
)
#
else:
#
print(location_dict[gene])
#
print(gene_name, location_gene, location_gistic, location_rubic)
tool
=
"RUBIC"
with
open
(
overlap_file
,
'w'
)
as
out
:
for
gene
in
location_dict
.
keys
():
...
...
@@ -248,24 +232,6 @@ class ReportTools:
out_line
=
[
gene_name
,
location_gene
[
0
]
+
":"
+
"-"
.
join
(
location_gene
[
1
:
3
]),
location_gistic
[
0
]
+
":"
+
"-"
.
join
(
location_gistic
[
1
:
3
]),
location_rubic
[
0
]
+
":"
+
"-"
.
join
(
location_rubic
[
1
:
3
])]
out
.
write
(
"
\t
"
.
join
(
out_line
)
+
"
\n
"
)
# top_genes = []
# top_genes = parse().gene_file(known_genes, 10)
# #print(top_genes)
# with open(overlap_file, 'w') as out:
# for gene in top_genes:
# out.write(gene + "\n")
#print(gene)
# location_gene = self.get_location_gene(gene_file, gene)
# location_gene = list(map(str, location_gene))
#print(location_gene)
# out.write("Location of gene:\t" + "\t".join(location_gene) + "\n")
# if location_gene != ('N/A', 'N/A', 'N/A'):
# location_gistic = self.get_location_gistic(gistic_results, location_gene)
# out.write("Location CNV GISTIC:\t" + "\t".join(location_gistic) + "\n")
# location_rubic = self.get_location_rubic(rubic_results, gene)
# out.write("Location CNV RUBIC:\t" + "\t".join(location_rubic) + "\n\n")
def
get_location_gene
(
self
,
biomart_file
,
gene_of_interest
):
"""Get location of a gene from a biomart gene file"""
location
=
[
'N/A'
,
'N/A'
,
'N/A'
]
...
...
@@ -284,7 +250,7 @@ class ReportTools:
# continue
return
location
,
gene_name
def
get_location_gistic
(
self
,
gistic_results
,
location_gene
):
def
get_location_gistic
(
self
,
gistic_results
,
location_gene
):
#more regions overlap: extract one with longest overlap
"""Get location of the CNV detected by GISTIC2 harboring the gene of interest"""
loc
=
[
'N/A'
,
'N/A'
,
'N/A'
]
chrom_gene
,
start_gene
,
end_gene
=
location_gene
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment