Commit 1a6403cd authored by Beatrice Tan's avatar Beatrice Tan

Improved structure of rules and functions.

parent db6d0312
from snakemake.utils import report
#Configure input and settings
configfile: "config.yaml"
......@@ -9,15 +11,15 @@ import os.path
#Rules to run pipeline for prioritization of regions and genes.
include: "rules/PreprocessInput.smk"
include: "rules/GISTIC2.smk"
include: "rules/Rubic.smk"
include: "rules/RUBIC.smk"
include: "rules/GenePrioritization.smk"
#Rules to compare different inputs.
include: "rules/ComparisonRegions.smk"
include: "rules/Circos.smk"
#include: "rules/SampleSizes.smk"
include: "rules/SampleSizes.smk"
#include: "rules/UseControl.smk"
#include: "rules/ComparisonSettings.smk"
include: "rules/ComparisonSettings.smk"
#Directory to save all files.
workdir: config["workdir"]
......@@ -39,11 +41,12 @@ onsuccess:
onerror:
print("\n\nPipeline failed. Possible reasons:\n- Wrong input files\n- Missing arguments in config file\n- Error in conda environment\n\n")
rule all:
"""Define desired output from pipeline."""
input:
"Reports/Results.html"
#"Settings/Report.txt",
"PipelineResults.html",
"ComparisonResults.html"
rule help:
"""Print list of all targets with help."""
......@@ -51,9 +54,8 @@ rule help:
for rule in workflow.rules:
print('- ' + rule.name + "\t" + rule.docstring)
rule report:
"""Write html report on segmentation file."""
rule report_pipeline:
"""Write HTML report on output from pipeline."""
input:
seg="Reports/Segments.txt",
tools="Reports/Tools.txt",
......@@ -62,23 +64,40 @@ rule report:
genes_gistic="Reports/Genes_GISTIC2.txt",
genes_rubic="Reports/Genes_RUBIC.txt",
venn="Reports/Venn_overlap_genes.png",
swarmplot="Reports/Swarmplot_sizes.png",
circos="Reports/Circos/RecurrentRegions.png",
circos_legend="Reports/Circos/RecurrentRegions_legend.png",
known_genes="Reports/Overlap_known_genes.bed"
swarmplot="Reports/Comparison_sizes.png",
circos="Circos/RecurrentRegions_legend.png",
output:
html="PipelineResults.html"
run:
from snakemake.utils import report
report("""
====================================================
Report on the results of the prioritization pipeline
====================================================
- Report on segmentation file: seg_
- Report on comparison between tools and overlapping regions: tools_
- Table with all recurrent regions and overlapping genes: table_regions_
- Circos plot showing the raw segmentation file and recurrent regions detected by both tools: circos_
- Venn diagram showing the overlap between gene lists from both tools: venn_
- Swarmplot showing the differences in sizes between both tools: swarmplot_
""", output.html, metadata="Beatrice F. Tan (beatrice.ftan@gmail.com)", **input)
rule report_comparisons:
"""Write HTML report on comparisons between sample sizes, settings and using a control."""
input:
#size="Samplesizes/Report.txt"
circos_genes=get_list_genes_circos
output:
html="Reports/Results.html"
html="ComparisonResults.html"
run:
from snakemake.utils import report
with open(input.seg, 'r') as seg:
nr_samples = seg.readline().split("\t")[1].strip()
report("""
====================================================
Report on the results of the prioritization pipeline
====================================================
In total, {nr_samples} samples were present in the raw segmentation file.
See: Table T1_
- Report on sample size comparison:
""", output.html, metadata="Beatrice F. Tan (beatrice.ftan@gmail.com)", T1=input[0])
#**input)
""", output.html, metadata="Beatrice F. Tan (beatrice.ftan@gmail.com)", **input)
#Directories to be specified
workdir: /home/bftan/CNA_results #directory to write output
gisticdir: /home/bftan/Tools/GISTIC2 #directory to install GISTIC2
#workdir: /home/bftan/CNA_results #directory to write output
#gisticdir: /home/bftan/Tools/GISTIC2 #directory to install GISTIC2
#workdir: /home/beatrice/CNA_analysis
#gisticdir: /home/beatrice/CNA_analysis/run_gistic2
workdir: /home/beatrice/CNA_99_genegistic
gisticdir: /home/beatrice/CNA_analysis/run_gistic2
#Input details to download from firehose
cancer_type: SKCM
......@@ -14,22 +14,23 @@ inputfile: "" #tumor segmentation data
normal: "" #normal segmentation data
#Data for running and benchmarking tools.
reference: hg19
reference: hg19 #hg38.UCSC.add_miR.160920.refgene
prev_found_genes: input_files/intogen-CM-drivers-data.tsv
census_genes: input_files/Census_genes.txt
biomart_genes: input_files/biomart_human_genes.tsv
biomart_genes: input_files/biomart_human_genes_hg19.tsv #wrong genome build
ID_to_GO: input_files/ID_to_GO.txt
#Settings GISTIC2.0
gistic_precision: "99"
settings_gistic: ""
settings_gistic: "-brlen 0.98 -genegistic 1"
comparison_settings: ["-ta 0.1 -td 0.1 -qvt 0.25 -brlen 0.7 -cap 1.5 -rx 1 -genegistic 1 -conf 0.99",
"-ta 0.1 -td 0.1 -qvt 0.25 -brlen 0.7 -cap 1.5 -rx 1 -genegistic 1 -conf 0.75",
"-ta 0.1 -td 0.1 -qvt 0.25 -brlen 0.98 -cap 1.5 -rx 1 -genegistic 1 -conf 0.75",
"-ta 0.1 -td 0.1 -qvt 0.25 -brlen 0.98 -cap 1.5 -rx 1 -genegistic 0 -conf 0.75",
"-ta 0.1 -td 0.1 -qvt 0.25 -brlen 0.7 -cap 1.5 -rx 1 -genegistic 0 -conf 0.75",]
#GISTIC2.0 settings to compare
comparison_precision: ["99", "90", "75"]
comparison_settings: ["-brlen 0.7 -genegistic 1",
"-brlen 0.98 -genegistic 1",
"-brlen 0.98 -genegistic 0",
"-brlen 0.7 -genegistic 0"]
#Settings for sample size differences
sizes: [20, 30, 40, 50, 60, 70, 80, 90]
sizes: [20, 30, 40, 50, 60, 70, 80, 90, 100, 110]
repeats: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
KIT
PDGFRA
KDR
CDK4
CCND1
MDM2
TERT
BRAF
MITF
PD-L1
NRAS
CD274
This source diff could not be displayed because it is too large. You can view the blob instead.
BRAF 0.9995 CFR 222 perProject_driver
NRAS 0.9965 CFR 104 perProject_driver
TP53 0.007 CFR 60 perProject_driver
CDKN2A 0.0315 CFR 60 perProject_driver
PTEN 0.01 CFR 39 perProject_driver
PPP6C 0.147 CF 36 perProject_driver
IDH1 0.995 CFR 20 perProject_driver
FBXW7 0.0335 CFR 16 perProject_driver
MLL3 0.018 FR 118 perProject_driver
NF1 0 FR 86 perProject_driver
ARID2 0.0185 FR 60 perProject_driver
CTNNB1 0.982 FR 24 perProject_driver
MAP2K1 0.972 CF 24 perProject_driver
CLCC1 0.7855 22 perProject_driver
RAC1 0.8875 CF 20 perProject_driver
CASP8 0.011 FR 14 perProject_driver
ANK3 0.881 F 322 perProject_driver
SVEP1 0.7 F 128 perProject_driver
PCDH18 0.8215 F 115 perProject_driver
MECOM 0.968 F 112 perProject_driver
AHNAK 0.956 F 99 perProject_driver
PLCB1 0.983 87 perProject_driver
BCLAF1 0.9105 F 81 perProject_driver
HDAC9 0.9045 F 81 perProject_driver
COL1A1 0.977 F 67 perProject_driver
ARHGAP29 59 perProject_driver
ASPM 0.95 F 53 perProject_driver
MAGI2 50 perProject_driver
CHD6 0.973 48 perProject_driver
USP6 48 perProject_driver
TRERF1 0.9335 46 perProject_driver
WNK1 44 perProject_driver
TCF4 0.7465 F 43 perProject_driver
C15orf55 0.698 41 perProject_driver
MSR1 0.1325 F 38 perProject_driver
BPTF 0.937 F 38 perProject_driver
BLM 0.3155 F 37 perProject_driver
PIK3C2B 36 perProject_driver
BAZ2B 0.909 F 34 perProject_driver
NCOR1 0.008 F 34 perProject_driver
NUP98 0.7165 F 31 perProject_driver
BRWD1 0.926 F 31 perProject_driver
FCRL4 30 perProject_driver
CHD9 0.7155 F 30 perProject_driver
IRF7 0.807 30 perProject_driver
EIF4G3 0.763 29 perProject_driver
EIF4G1 0.8765 F 29 perProject_driver
ARHGEF2 0.7575 29 perProject_driver
SYK 27 perProject_driver
EIF2AK3 0.038 F 27 perProject_driver
POM121 0.913 27 perProject_driver
FANCI 26 perProject_driver
MAP3K11 26 perProject_driver
ZNF638 24 perProject_driver
CYTH4 24 perProject_driver
LRPPRC 0.101 24 perProject_driver
DDX3X 0.1495 F 24 perProject_driver
STAG1 0.751 F 23 perProject_driver
WASF3 23 perProject_driver
RGS3 0.6595 23 perProject_driver
PER1 23 perProject_driver
ITGA9 0.9665 22 perProject_driver
AHCTF1 0.565 22 perProject_driver
AQR 0.7445 F 21 perProject_driver
MAP3K4 0.7045 F 20 perProject_driver
EZH2 0.179 F 19 perProject_driver
LNPEP 19 perProject_driver
CRTC3 18 perProject_driver
MFNG 0.927 17 perProject_driver
XRN1 0.388 17 perProject_driver
CDK4 17 perProject_driver
RASA2 0.1265 17 perProject_driver
TAOK1 0.577 F 17 perProject_driver
VIM 16 perProject_driver
ERBB3 0.995 16 perProject_driver
TJP2 0.7255 F 15 perProject_driver
MCM3 0.832 14 perProject_driver
SEC24D 0.0175 F 14 perProject_driver
SMURF2 13 perProject_driver
MCM8 0.932 12 perProject_driver
COPS2 0.0355 F 12 perProject_driver
MAT2A 12 perProject_driver
ACSL3 10 perProject_driver
ARHGEF6 0.214 F 10 perProject_driver
JMY 10 perProject_driver
FAF1 0.0145 10 perProject_driver
RHOT1 9 perProject_driver
DNMT3A 0.1995 CFR 13 Pooled_driver
PIK3CA 0.999 CFR 13 Pooled_driver
HRAS 0.922 CFR 8 Pooled_driver
KRAS 0.9995 CFR 6 Pooled_driver
STAG2 0.069 FR 68 Pooled_driver
ATRX 0.0015 FR 33 Pooled_driver
RB1 0.0005 FR 24 Pooled_driver
CIC 0.079 FR 23 Pooled_driver
WT1 0.062 FR 20 Pooled_driver
RHOA 0.7335 FR 19 Pooled_driver
CDKN1A 0.043 FR 15 Pooled_driver
PIK3R1 0.0215 FR 15 Pooled_driver
BAP1 0.0015 FR 15 Pooled_driver
GATA3 0.012 FR 14 Pooled_driver
KDM5C 0.137 FR 14 Pooled_driver
ELF3 0.039 FR 14 Pooled_driver
RUNX1 0.0015 FR 13 Pooled_driver
CDH1 0.0035 FR 11 Pooled_driver
MAP3K1 0.01 FR 11 Pooled_driver
AKT1 0.9945 CF 11 Pooled_driver
ACTG1 0.927 FR 11 Pooled_driver
STK11 0.0025 FR 10 Pooled_driver
SMAD4 0.016 FR 10 Pooled_driver
KDM6A 0.0095 FR 10 Pooled_driver
FUBP1 0.0195 FR 9 Pooled_driver
CTCF 0.007 FR 8 Pooled_driver
RPL5 0.059 FR 8 Pooled_driver
B2M 0.032 FR 8 Pooled_driver
MAP2K4 0.0225 FR 7 Pooled_driver
NFE2L2 0.985 FR 7 Pooled_driver
RPL22 0.089 FR 7 Pooled_driver
SPOP 0.6705 CF 7 Pooled_driver
RASA1 0.0085 FR 7 Pooled_driver
SMAD2 0.0005 FR 7 Pooled_driver
DDX5 0.126 FR 6 Pooled_driver
ACVR2A 0.012 FR 6 Pooled_driver
VHL 0.0105 FR 6 Pooled_driver
IDH2 0.9945 CF 6 Pooled_driver
PHF6 0.0205 FR 5 Pooled_driver
NPM1 0.041 FR 4 Pooled_driver
EIF1AX 0.194 FR 2 Pooled_driver
PIK3CB 0.5845 F 38 Pooled_driver
FN1 0.104 F 38 Pooled_driver
LRP6 0.927 F 36 Pooled_driver
MKL1 0.706 F 34 Pooled_driver
ARFGEF2 0.862 R 31 Pooled_driver
PTGS1 0.0635 F 30 Pooled_driver
ITSN1 0.2875 F 29 Pooled_driver
DICER1 0.0705 F 28 Pooled_driver
ARID1B 0.02 F 27 Pooled_driver
MED24 0.4115 R 26 Pooled_driver
SF3B1 0.9435 F 26 Pooled_driver
NOTCH1 0.0575 F 26 Pooled_driver
AFF4 0.682 F 24 Pooled_driver
EP300 0.0085 F 24 Pooled_driver
SETDB1 0.967 F 23 Pooled_driver
ARHGAP26 0.05 F 23 Pooled_driver
NTN4 0.783 F 22 Pooled_driver
NFATC4 0.1735 F 21 Pooled_driver
CHD3 0.2125 F 21 Pooled_driver
CEP290 0.1625 F 21 Pooled_driver
SOS2 0.8565 F 20 Pooled_driver
NUP107 0.877 F 20 Pooled_driver
FOXP1 0.0035 F 20 Pooled_driver
BRCA1 0.0045 F 19 Pooled_driver
POLR2B 0.865 F 19 Pooled_driver
CLTC 0.8705 F 19 Pooled_driver
CLSPN 0.116 F 18 Pooled_driver
TGFBR2 0.0345 F 18 Pooled_driver
DLG1 0.8765 F 17 Pooled_driver
MAP4K3 0.055 F 16 Pooled_driver
MLH1 0.0335 F 16 Pooled_driver
HDAC3 0.028 F 16 Pooled_driver
BNC2 0.0515 F 16 Pooled_driver
TBX3 0.058 F 15 Pooled_driver
IREB2 0.848 F 15 Pooled_driver
CASP1 0.031 F 15 Pooled_driver
TCF12 0 F 15 Pooled_driver
CLASP2 0.198 F 15 Pooled_driver
NDRG1 0.877 F 14 Pooled_driver
SOS1 0.7365 F 14 Pooled_driver
NCF2 0.57 F 14 Pooled_driver
CCAR1 0.012 F 13 Pooled_driver
NCKAP1 0.2985 F 13 Pooled_driver
ARHGAP35 0.0005 F 13 Pooled_driver
ACTG2 0.769 F 13 Pooled_driver
HLA-A 0.003 F 13 Pooled_driver
FGFR3 0.8115 F 12 Pooled_driver
DHX15 0.0075 F 12 Pooled_driver
RTN4 0.1965 F 12 Pooled_driver
MEN1 0.0285 F 12 Pooled_driver
SYNCRIP 0.396 F 12 Pooled_driver
NR4A2 0.191 F 12 Pooled_driver
KLF6 0.15 F 11 Pooled_driver
SMC1A 0.8855 F 11 Pooled_driver
TNPO1 0.694 F 11 Pooled_driver
CNOT3 0.9095 F 11 Pooled_driver
CAT 0.836 F 11 Pooled_driver
CHD1L 0.658 F 11 Pooled_driver
CAST 0.0135 F 11 Pooled_driver
EIF4A2 0.836 F 11 Pooled_driver
NF2 0.0415 F 11 Pooled_driver
PAX5 0.966 F 11 Pooled_driver
ZNF814 0.904 C 11 Pooled_driver
CUL1 0.213 F 10 Pooled_driver
ZC3H11A 0.3245 F 10 Pooled_driver
CYLD 0.035 F 10 Pooled_driver
AHR 0.876 F 10 Pooled_driver
PRRX1 0.9445 F 10 Pooled_driver
PIK3R3 0.919 F 10 Pooled_driver
ZMYM2 0.201 F 10 Pooled_driver
ACO1 0.254 F 10 Pooled_driver
ACVR1B 0.187 F 10 Pooled_driver
ATIC 0.0305 F 10 Pooled_driver
ERBB2IP 0.2055 F 9 Pooled_driver
FXR1 0.1815 F 9 Pooled_driver
CDC73 0.5795 F 9 Pooled_driver
CCT5 0.8805 F 9 Pooled_driver
TFDP1 0.922 F 9 Pooled_driver
FAS 0.0895 F 8 Pooled_driver
CUL3 0.1075 F 8 Pooled_driver
MED17 0.8765 F 8 Pooled_driver
PPP2R5C 0.966 F 8 Pooled_driver
PSMA6 0.4425 F 8 Pooled_driver
WIPF1 0.836 F 8 Pooled_driver
LDHA 0.229 F 8 Pooled_driver
CLOCK 0.126 F 8 Pooled_driver
LCP1 0.014 F 8 Pooled_driver
PIP5K1A 0.547 F 8 Pooled_driver
RAD21 0.286 R 8 Pooled_driver
CHEK2 0.2895 F 8 Pooled_driver
BMPR2 0.0405 F 8 Pooled_driver
HLA-B 0.0645 F 8 Pooled_driver
PPP2R5A 0.048 F 7 Pooled_driver
DIS3 0.8525 F 7 Pooled_driver
CUL2 0.207 F 7 Pooled_driver
HSPA8 0.768 F 7 Pooled_driver
TFDP2 0.123 F 7 Pooled_driver
KLF4 0.686 F 7 Pooled_driver
CSNK1G3 0.0235 F 7 Pooled_driver
ZFP36L2 0.0055 F 7 Pooled_driver
GOLGA5 0.596 F 6 Pooled_driver
ARFGAP1 0.4275 F 6 Pooled_driver
FMR1 0.1195 F 6 Pooled_driver
RHEB 0.886 F 6 Pooled_driver
ATF1 0.622 F 6 Pooled_driver
G3BP2 0.882 F 6 Pooled_driver
SUZ12 0.003 F 6 Pooled_driver
SF3A3 0.09 F 6 Pooled_driver
NR2F2 0.8885 F 6 Pooled_driver
SFPQ 0.0455 F 5 Pooled_driver
RAD23B 0.1235 F 5 Pooled_driver
SOX9 0.0655 F 5 Pooled_driver
GNG2 0.0205 F 5 Pooled_driver
CSDA 0.8635 F 4 Pooled_driver
YBX1 0.8715 F 4 Pooled_driver
CBFB 0.0055 F 4 Pooled_driver
PPP2R1A 0.895 F 4 Pooled_driver
HLF 0.8865 F 4 Pooled_driver
ELF1 0.0395 F 4 Pooled_driver
RBBP7 0.886 F 3 Pooled_driver
HNRPDL 0.0195 F 3 Pooled_driver
ATP6AP2 0.0415 F 2 Pooled_driver
DRIVER_LABEL GENE SYMBOL MUTS_CS MUTS_PAM_SAMPLES POOL_SIGNALS CANCER_TYPE KNOWN_DRIVER MUTS_CS_SAMPLES SIGNAL_COUNT MUTS_PAM SIGNALS ONCODRIVE_ROLE
perProject_driver ENSG00000157764 BRAF 222 206 CFR CM True 209 3 216 CFR 0.9995
perProject_driver ENSG00000213281 NRAS 104 100 CFR CM True 103 3 101 CFR 0.9965
perProject_driver ENSG00000141510 TP53 60 53 CFR CM True 56 3 57 FR 0.006999999999999999
perProject_driver ENSG00000147889 CDKN2A 60 56 CFR CM True 58 3 57 CFR 0.0315
perProject_driver ENSG00000171862 PTEN 39 36 CFR CM True 38 3 37 F 0.01
perProject_driver ENSG00000119414 PPP6C 36 31 CF CM 34 3 32 CFR 0.147
perProject_driver ENSG00000138413 IDH1 20 15 CFR CM True 19 3 15 CF 0.995
perProject_driver ENSG00000109670 FBXW7 16 12 CFR CM True 12 3 14 F 0.0335
perProject_driver ENSG00000055609 MLL3 118 72 FR CM True 88 2 91 F 0.018000000000000002
perProject_driver ENSG00000196712 NF1 86 51 FR CM True 64 2 69 F 0.0
perProject_driver ENSG00000189079 ARID2 60 44 FR CM True 47 2 56 F 0.0185
perProject_driver ENSG00000168036 CTNNB1 24 19 FR CM True 24 2 19 F 0.982
perProject_driver ENSG00000169032 MAP2K1 24 19 CF CM 24 2 19 CF 0.972
perProject_driver ENSG00000121940 CLCC1 22 16 CM 21 2 17 CR 0.7855
perProject_driver ENSG00000136238 RAC1 20 14 CF CM 20 2 14 CF 0.8875
perProject_driver ENSG00000064012 CASP8 14 8 FR CM 13 2 8 F 0.011000000000000001
perProject_driver ENSG00000151150 ANK3 322 123 F CM 152 1 232 F 0.8809999999999999
perProject_driver ENSG00000165124 SVEP1 128 72 F CM 89 1 95 F 0.7
perProject_driver ENSG00000189184 PCDH18 115 69 F CM 84 1 85 F 0.8215
perProject_driver ENSG00000085276 MECOM 112 67 F CM True 82 1 83 F 0.968
perProject_driver ENSG00000124942 AHNAK 99 54 F CM 76 1 66 F 0.956
perProject_driver ENSG00000182621 PLCB1 87 51 CM 64 1 65 F 0.983
perProject_driver ENSG00000029363 BCLAF1 81 56 F CM 64 1 69 F 0.9105
perProject_driver ENSG00000048052 HDAC9 81 43 F CM 58 1 53 F 0.9045
perProject_driver ENSG00000108821 COL1A1 67 46 F CM True 58 1 49 F 0.977
perProject_driver ENSG00000137962 ARHGAP29 59 37 CM 48 1 41 F
perProject_driver ENSG00000066279 ASPM 53 35 F CM 45 1 40 F 0.95
perProject_driver ENSG00000187391 MAGI2 50 31 CM 41 1 32 F
perProject_driver ENSG00000124177 CHD6 48 37 CM 43 1 39 F 0.973
perProject_driver ENSG00000129204 USP6 48 35 CM True 43 1 40 F
perProject_driver ENSG00000124496 TRERF1 46 26 CM 35 1 31 F 0.9335
perProject_driver ENSG00000060237 WNK1 44 22 CM 35 1 28 F
perProject_driver ENSG00000196628 TCF4 43 28 F CM 37 1 32 F 0.7465
perProject_driver ENSG00000184507 C15orf55 41 24 CM True 32 1 26 F 0.698
perProject_driver ENSG00000038945 MSR1 38 25 F CM 33 1 28 F 0.1325
perProject_driver ENSG00000171634 BPTF 38 33 F CM 36 1 34 F 0.937
perProject_driver ENSG00000197299 BLM 37 24 F CM True 28 1 31 F 0.3155
perProject_driver ENSG00000133056 PIK3C2B 36 17 CM 30 1 18 C
perProject_driver ENSG00000123636 BAZ2B 34 27 F CM 34 1 27 F 0.909
perProject_driver ENSG00000141027 NCOR1 34 23 F CM 32 1 24 F 0.008
perProject_driver ENSG00000110713 NUP98 31 18 F CM True 28 1 20 F 0.7165
perProject_driver ENSG00000185658 BRWD1 31 21 F CM 26 1 24 F 0.9259999999999999
perProject_driver ENSG00000163518 FCRL4 30 19 CM True 27 1 20 C
perProject_driver ENSG00000177200 CHD9 30 23 F CM 26 1 25 F 0.7155
perProject_driver ENSG00000185507 IRF7 30 10 CM 27 1 10 F 0.807
perProject_driver ENSG00000075151 EIF4G3 29 21 CM 26 1 23 F 0.763
perProject_driver ENSG00000114867 EIF4G1 29 17 F CM 24 1 18 F 0.8765
perProject_driver ENSG00000116584 ARHGEF2 29 18 CM 28 1 18 F 0.7575
perProject_driver ENSG00000165025 SYK 27 18 CM True 25 1 20 F
perProject_driver ENSG00000172071 EIF2AK3 27 21 F CM 22 1 24 F 0.038
perProject_driver ENSG00000196313 POM121 27 17 CM 25 1 19 F 0.9129999999999999
perProject_driver ENSG00000140525 FANCI 26 14 CM 24 1 15 C
perProject_driver ENSG00000173327 MAP3K11 26 10 CM 21 1 10 C
perProject_driver ENSG00000075292 ZNF638 24 17 CM 24 1 17 F
perProject_driver ENSG00000100055 CYTH4 24 10 CM 21 1 11 F
perProject_driver ENSG00000138095 LRPPRC 24 17 CM 19 1 20 F 0.10099999999999999
perProject_driver ENSG00000215301 DDX3X 24 24 F CM 24 1 24 F 0.1495
perProject_driver ENSG00000118007 STAG1 23 20 F CM 22 1 20 F 0.7509999999999999
perProject_driver ENSG00000132970 WASF3 23 15 CM 21 1 16 C
perProject_driver ENSG00000138835 RGS3 23 15 CM 23 1 15 C 0.6595
perProject_driver ENSG00000179094 PER1 23 12 CM True 22 1 12 F
perProject_driver ENSG00000144668 ITGA9 22 17 CM 20 1 18 F 0.9665
perProject_driver ENSG00000153207 AHCTF1 22 16 CM 22 1 16 F 0.565
perProject_driver ENSG00000021776 AQR 21 16 F CM 20 1 16 F 0.7445
perProject_driver ENSG00000085511 MAP3K4 20 16 F CM 20 1 16 F 0.7045
perProject_driver ENSG00000106462 EZH2 19 18 F CM True 19 1 18 F 0.179
perProject_driver ENSG00000113441 LNPEP 19 12 CM 13 1 15 F
perProject_driver ENSG00000140577 CRTC3 18 13 CM True 16 1 14 C
perProject_driver ENSG00000100060 MFNG 17 14 CM 17 1 14 F 0.927
perProject_driver ENSG00000114127 XRN1 17 15 CM 17 1 15 F 0.38799999999999996
perProject_driver ENSG00000135446 CDK4 17 11 CM True 17 1 11 C
perProject_driver ENSG00000155903 RASA2 17 15 CM 17 1 15 F 0.1265
perProject_driver ENSG00000160551 TAOK1 17 15 F CM 17 1 15 F 0.5770000000000001
perProject_driver ENSG00000026025 VIM 16 12 CM 14 1 12 C
perProject_driver ENSG00000065361 ERBB3 16 10 CM 14 1 11 F 0.995
perProject_driver ENSG00000119139 TJP2 15 13 F CM 14 1 13 F 0.7255
perProject_driver ENSG00000112118 MCM3 14 9 CM 12 1 10 F 0.8320000000000001
perProject_driver ENSG00000150961 SEC24D 14 12 F CM 14 1 12 F 0.0175
perProject_driver ENSG00000108854 SMURF2 13 11 CM 12 1 12 F
perProject_driver ENSG00000125885 MCM8 12 10 CM 11 1 10 F 0.932
perProject_driver ENSG00000166200 COPS2 12 8 F CM 11 1 8 F 0.0355
perProject_driver ENSG00000168906 MAT2A 12 7 CM 12 1 7 F
perProject_driver ENSG00000123983 ACSL3 10 8 CM True 9 1 8 F
perProject_driver ENSG00000129675 ARHGEF6 10 10 F CM 10 1 10 F 0.214
perProject_driver ENSG00000152409 JMY 10 7 CM 10 1 7 C
perProject_driver ENSG00000185104 FAF1 10 9 CM 10 1 9 F 0.0145
perProject_driver ENSG00000126858 RHOT1 9 9 CM 9 1 9 F
Pooled_driver ENSG00000119772 DNMT3A 13 7 CFR CM True 12 3 7 0.1995
Pooled_driver ENSG00000121879 PIK3CA 13 11 CFR CM True 13 3 11 0.9990000000000001
Pooled_driver ENSG00000174775 HRAS 8 5 CFR CM True 7 3 5 0.922
Pooled_driver ENSG00000133703 KRAS 6 3 CFR CM True 5 3 4 0.9995
Pooled_driver ENSG00000101972 STAG2 68 13 FR CM 57 2 13 0.069
Pooled_driver ENSG00000085224 ATRX 33 21 FR CM True 26 2 27 0.0015
Pooled_driver ENSG00000139687 RB1 24 14 FR CM True 21 2 17 0.0005
Pooled_driver ENSG00000079432 CIC 23 14 FR CM True 21 2 14 0.079
Pooled_driver ENSG00000184937 WT1 20 9 FR CM True 18 2 10 0.062
Pooled_driver ENSG00000067560 RHOA 19 4 FR CM 19 2 4 0.7335
Pooled_driver ENSG00000124762 CDKN1A 15 5 FR CM 14 2 5 0.043
Pooled_driver ENSG00000145675 PIK3R1 15 8 FR CM True 13 2 9 0.0215
Pooled_driver ENSG00000163930 BAP1 15 7 FR CM True 15 2 7 0.0015
Pooled_driver ENSG00000107485 GATA3 14 9 FR CM True 12 2 9 0.012
Pooled_driver ENSG00000126012 KDM5C 14 6 FR CM True 12 2 6 0.13699999999999998
Pooled_driver ENSG00000163435 ELF3 14 2 FR CM 12 2 2 0.039
Pooled_driver ENSG00000159216 RUNX1 13 2 FR CM True 12 2 2 0.0015
Pooled_driver ENSG00000039068 CDH1 11 7 FR CM True 11 2 7 0.0035
Pooled_driver ENSG00000095015 MAP3K1 11 9 FR CM 11 2 9 0.01
Pooled_driver ENSG00000142208 AKT1 11 4 CF CM True 11 2 4 0.9945
Pooled_driver ENSG00000184009 ACTG1 11 8 FR CM 11 2 8 0.927
Pooled_driver ENSG00000118046 STK11 10 4 FR CM True 8 2 4 0.0025
Pooled_driver ENSG00000141646 SMAD4 10 4 FR CM True 10 2 4 0.016
Pooled_driver ENSG00000147050 KDM6A 10 5 FR CM True 7 2 8 0.0095
Pooled_driver ENSG00000162613 FUBP1 9 6 FR CM True 9 2 6 0.0195
Pooled_driver ENSG00000102974 CTCF 8 3 FR CM 8 2 3 0.006999999999999999
Pooled_driver ENSG00000122406 RPL5 8 6 FR CM 8 2 6 0.059000000000000004
Pooled_driver ENSG00000166710 B2M 8 7 FR CM 8 2 7 0.032
Pooled_driver ENSG00000065559 MAP2K4 7 7 FR CM True 7 2 7 0.0225
Pooled_driver ENSG00000116044 NFE2L2 7 4 FR CM True 6 2 5 0.985
Pooled_driver ENSG00000116251 RPL22 7 3 FR CM True 7 2 3 0.08900000000000001
Pooled_driver ENSG00000121067 SPOP 7 3 CF CM 7 2 3 0.6705
Pooled_driver ENSG00000145715 RASA1 7 5 FR CM 6 2 6 0.0085
Pooled_driver ENSG00000175387 SMAD2 7 4 FR CM 6 2 4 0.0005
Pooled_driver ENSG00000108654 DDX5 6 3 FR CM True 6 2 3 0.126
Pooled_driver ENSG00000121989 ACVR2A 6 4 FR CM 5 2 5 0.012
Pooled_driver ENSG00000134086 VHL 6 5 FR CM True 6 2 5 0.0105
Pooled_driver ENSG00000182054 IDH2 6 6 CF CM True 6 2 6 0.9945
Pooled_driver ENSG00000156531 PHF6 5 3 FR CM True 5 2 3 0.0205
Pooled_driver ENSG00000181163 NPM1 4 3 FR CM True 4 2 3 0.040999999999999995
Pooled_driver ENSG00000173674 EIF1AX 2 2 FR CM 2 2 2 0.19399999999999998
Pooled_driver ENSG00000051382 PIK3CB 38 24 F CM 32 1 30 0.5845
Pooled_driver ENSG00000115414 FN1 38 22 F CM 34 1 24 0.10400000000000001
Pooled_driver ENSG00000070018 LRP6 36 23 F CM 30 1 26 0.927
Pooled_driver ENSG00000196588 MKL1 34 15 F CM True 27 1 15 0.706
Pooled_driver ENSG00000124198 ARFGEF2 31 20 R CM 28 1 23 0.862
Pooled_driver ENSG00000095303 PTGS1 30 18 F CM 27 1 19 0.0635
Pooled_driver ENSG00000205726 ITSN1 29 16 F CM 26 1 17 0.2875
Pooled_driver ENSG00000100697 DICER1 28 23 F CM True 28 1 23 0.0705
Pooled_driver ENSG00000049618 ARID1B 27 17 F CM 24 1 19 0.02
Pooled_driver ENSG00000008838 MED24 26 7 R CM 21 1 7 0.4115
Pooled_driver ENSG00000115524 SF3B1 26 18 F CM True 24 1 20 0.9435
Pooled_driver ENSG00000148400 NOTCH1 26 10 F CM True 25 1 10 0.0575
Pooled_driver ENSG00000072364 AFF4 24 18 F CM True 23 1 18 0.682
Pooled_driver ENSG00000100393 EP300 24 17 F CM True 24 1 17 0.0085
Pooled_driver ENSG00000143379 SETDB1 23 11 F CM 20 1 13 0.9670000000000001
Pooled_driver ENSG00000145819 ARHGAP26 23 11 F CM True 20 1 11 0.05
Pooled_driver ENSG00000074527 NTN4 22 14 F CM 21 1 14 0.7829999999999999
Pooled_driver ENSG00000100968 NFATC4 21 14 F CM 20 1 15 0.1735
Pooled_driver ENSG00000170004 CHD3 21 14 F CM 19 1 14 0.2125
Pooled_driver ENSG00000198707 CEP290 21 16 F CM 20 1 17 0.1625
Pooled_driver ENSG00000100485 SOS2 20 13 F CM 19 1 14 0.8565
Pooled_driver ENSG00000111581 NUP107 20 8 F CM 16 1 8 0.877
Pooled_driver ENSG00000114861 FOXP1 20 12 F CM True 18 1 12 0.0035
Pooled_driver ENSG00000012048 BRCA1 19 11 F CM True 18 1 11 0.0045
Pooled_driver ENSG00000047315 POLR2B 19 12 F CM 17 1 14 0.865
Pooled_driver ENSG00000141367 CLTC 19 9 F CM True 17 1 9 0.8705
Pooled_driver ENSG00000092853 CLSPN 18 8 F CM 16 1 9 0.11599999999999999
Pooled_driver ENSG00000163513 TGFBR2 18 12 F CM 17 1 13 0.0345
Pooled_driver ENSG00000075711 DLG1 17 10 F CM 15 1 11 0.8765
Pooled_driver ENSG00000011566 MAP4K3 16 9 F CM 14 1 9 0.055
Pooled_driver ENSG00000076242 MLH1 16 5 F CM True 13 1 5 0.0335
Pooled_driver ENSG00000171720 HDAC3 16 8 F CM 16 1 8 0.027999999999999997
Pooled_driver ENSG00000173068 BNC2 16 11 F CM 16 1 11 0.0515
Pooled_driver ENSG00000135111 TBX3 15 7 F CM 15 1 7 0.057999999999999996
Pooled_driver ENSG00000136381 IREB2 15 12 F CM 14 1 13 0.848
Pooled_driver ENSG00000137752 CASP1 15 4 F CM 13 1 4 0.031
Pooled_driver ENSG00000140262 TCF12 15 12 F CM True 15 1 12 0.0
Pooled_driver ENSG00000163539 CLASP2 15 11 F CM 15 1 11 0.198
Pooled_driver ENSG00000104419 NDRG1 14 10 F CM True 13 1 10 0.877
Pooled_driver ENSG00000115904 SOS1 14 11 F CM 14 1 11 0.7365
Pooled_driver ENSG00000116701 NCF2 14 9 F CM 14 1 9 0.57
Pooled_driver ENSG00000060339 CCAR1 13 8 F CM 12 1 9 0.012
Pooled_driver ENSG00000061676 NCKAP1 13 9 F CM 13 1 9 0.2985
Pooled_driver ENSG00000160007 ARHGAP35 13 11 F CM 11 1 13 0.0005
Pooled_driver ENSG00000163017 ACTG2 13 6 F CM 13 1 6 0.769
Pooled_driver ENSG00000206503 HLA-A 13 5 F CM 13 1 5 0.003
Pooled_driver ENSG00000068078 FGFR3 12 10 F CM True 12 1 10 0.8115
Pooled_driver ENSG00000109606 DHX15 12 7 F CM 12 1 7 0.0075
Pooled_driver ENSG00000115310 RTN4 12 10 F CM 11 1 11 0.1965
Pooled_driver ENSG00000133895 MEN1 12 6 F CM True 12 1 6 0.0285
Pooled_driver ENSG00000135316 SYNCRIP 12 4 F CM 11 1 4 0.396
Pooled_driver ENSG00000153234 NR4A2 12 7 F CM 10 1 7 0.191
Pooled_driver ENSG00000067082 KLF6 11 5 F CM True 10 1 5 0.15
Pooled_driver ENSG00000072501 SMC1A 11 8 F CM 10 1 8 0.8855
Pooled_driver ENSG00000083312 TNPO1 11 8 F CM 11 1 8 0.6940000000000001
Pooled_driver ENSG00000088038 CNOT3 11 6 F CM 11 1 6 0.9095
Pooled_driver ENSG00000121691 CAT 11 9 F CM 10 1 9 0.836
Pooled_driver ENSG00000131778 CHD1L 11 11 F CM 11 1 11 0.6579999999999999
Pooled_driver ENSG00000153113 CAST 11 7 F CM 11 1 7 0.0135
Pooled_driver ENSG00000156976 EIF4A2 11 4 F CM True 11 1 4 0.836
Pooled_driver ENSG00000186575 NF2 11 8 F CM True 11 1 8 0.0415
Pooled_driver ENSG00000196092 PAX5 11 6 F CM True 11 1 6 0.966
Pooled_driver ENSG00000204514 ZNF814 11 4 C CM 10 1 5 0.904
Pooled_driver ENSG00000055130 CUL1 10 8 F CM 10 1 8 0.213
Pooled_driver ENSG00000058673 ZC3H11A 10 6 F CM 9 1 6 0.3245
Pooled_driver ENSG00000083799 CYLD 10 8 F CM True 10 1 8 0.035
Pooled_driver ENSG00000106546 AHR 10 8 F CM 10 1 8 0.8759999999999999
Pooled_driver ENSG00000116132 PRRX1 10 6 F CM True 10 1 6 0.9445
Pooled_driver ENSG00000117461 PIK3R3 10 3 F CM 10 1 3 0.919
Pooled_driver ENSG00000121741 ZMYM2 10 8 F CM True 10 1 8 0.201
Pooled_driver ENSG00000122729 ACO1 10 7 F CM 10 1 7 0.254
Pooled_driver ENSG00000135503 ACVR1B 10 5 F CM 10 1 5 0.187
Pooled_driver ENSG00000138363 ATIC 10 8 F CM True 10 1 8 0.0305
Pooled_driver ENSG00000112851 ERBB2IP 9 2 F CM 7 1 4 0.2055
Pooled_driver ENSG00000114416 FXR1 9 5 F CM 9 1 5 0.1815
Pooled_driver ENSG00000134371 CDC73 9 6 F CM True 9 1 6 0.5795
Pooled_driver ENSG00000150753 CCT5 9 4 F CM 8 1 4 0.8805
Pooled_driver ENSG00000198176 TFDP1 9 4 F CM 9 1 4 0.922
Pooled_driver ENSG00000026103 FAS 8 7 F CM True 7 1 7 0.0895
Pooled_driver ENSG00000036257 CUL3 8 4 F CM 8 1 4 0.1075
Pooled_driver ENSG00000042429 MED17 8 6 F CM 8 1 6 0.8765
Pooled_driver ENSG00000078304 PPP2R5C 8 7 F CM 7 1 8 0.966
Pooled_driver ENSG00000100902 PSMA6 8 5 F CM 8 1 5 0.4425
Pooled_driver ENSG00000115935 WIPF1 8 4 F CM 8 1 4 0.836
Pooled_driver ENSG00000134333 LDHA 8 7 F CM 8 1 7 0.22899999999999998
Pooled_driver ENSG00000134852 CLOCK 8 5 F CM 8 1 5 0.126
Pooled_driver ENSG00000136167 LCP1 8 5 F CM True 8 1 5 0.013999999999999999
Pooled_driver ENSG00000143398 PIP5K1A 8 4 F CM 7 1 4 0.547
Pooled_driver ENSG00000164754 RAD21 8 5 R CM 8 1 5 0.28600000000000003
Pooled_driver ENSG00000183765 CHEK2 8 4 F CM True 8 1 4 0.2895
Pooled_driver ENSG00000204217 BMPR2 8 7 F CM 8 1 7 0.0405
Pooled_driver ENSG00000234745 HLA-B 8 6 F CM 8 1 6 0.0645
Pooled_driver ENSG00000066027 PPP2R5A 7 5 F CM 7 1 5 0.048
Pooled_driver ENSG00000083520 DIS3 7 5 F CM 7 1 5 0.8525
Pooled_driver ENSG00000108094 CUL2 7 6 F CM 7 1 6 0.207
Pooled_driver ENSG00000109971 HSPA8 7 4 F CM 6 1 5 0.768
Pooled_driver ENSG00000114126 TFDP2 7 5 F CM 7 1 5 0.12300000000000001
Pooled_driver ENSG00000136826 KLF4 7 5 F CM 7 1 5 0.6859999999999999
Pooled_driver ENSG00000151292 CSNK1G3 7 6 F CM 7 1 6 0.0235
Pooled_driver ENSG00000152518 ZFP36L2 7 5 F CM 7 1 5 0.0055
Pooled_driver ENSG00000066455 GOLGA5 6 2 F CM True 6 1 2 0.596
Pooled_driver ENSG00000101199 ARFGAP1 6 3 F CM 6 1 3 0.4275
Pooled_driver ENSG00000102081 FMR1 6 5 F CM 6 1 5 0.1195
Pooled_driver ENSG00000106615 RHEB 6 2 F CM 6 1 2 0.8859999999999999
Pooled_driver ENSG00000123268 ATF1 6 5 F CM True 6 1 5 0.622
Pooled_driver ENSG00000138757 G3BP2 6 3 F CM 6 1 3 0.882
Pooled_driver ENSG00000178691 SUZ12 6 3 F CM True 6 1 3 0.003
Pooled_driver ENSG00000183431 SF3A3 6 5 F CM 6 1 5 0.09
Pooled_driver ENSG00000185551 NR2F2 6 4 F CM 5 1 5 0.8885
Pooled_driver ENSG00000116560 SFPQ 5 5 F CM True 5 1 5 0.0455
Pooled_driver ENSG00000119318 RAD23B 5 5 F CM 5 1 5 0.1235
Pooled_driver ENSG00000125398 SOX9 5 3 F CM 5 1 3 0.0655
Pooled_driver ENSG00000186469 GNG2 5 3 F CM 5 1 3 0.0205
Pooled_driver ENSG00000060138 CSDA 4 2 F CM 4 1 2 0.8635
Pooled_driver ENSG00000065978 YBX1 4 3 F CM 4 1 3 0.8715
Pooled_driver ENSG00000067955 CBFB 4 3 F CM True 4 1 3 0.0055
Pooled_driver ENSG00000105568 PPP2R1A 4 2 F CM True 4 1 2 0.895
Pooled_driver ENSG00000108924 HLF 4 3 F CM True 4 1 3 0.8865
Pooled_driver ENSG00000120690 ELF1 4 4 F CM 4 1 4 0.0395
Pooled_driver ENSG00000102054 RBBP7 3 2 F CM 3 1 2 0.8859999999999999
Pooled_driver ENSG00000152795 HNRPDL 3 2 F CM 3 1 2 0.0195
Pooled_driver ENSG00000182220 ATP6AP2 2 2 F CM 2 1 2 0.0415
from Circos import InputCircos, bed_to_circos, make_CIRCOS_legend
from Circos import InputCircos, bed_to_circos, make_CIRCOS_legend, get_plot_region
from ReportTools import make_bed_genes
from PIL import Image
import os.path
rule make_CIRCOS_input:
"""Make input files for making a CIRCOS plot."""
......@@ -9,20 +11,20 @@ rule make_CIRCOS_input:
rubic_gains="RUBIC/gains.txt",
rubic_losses="RUBIC/losses.txt"
output:
seg="Reports/Circos/Segments.txt",
gistic="Reports/Circos/GISTIC_results.txt",
rubic="Reports/Circos/RUBIC_results.txt",
seg="Circos/Segments.txt",
gistic="Circos/GISTIC_results.txt",
rubic="Circos/RUBIC_results.txt",
run:
InputCircos(input.seg, input.gistic, input.rubic_gains, input.rubic_losses, output.seg, output.gistic, output.rubic)
rule make_CIRCOS_plot:
rule plot_CIRCOS:
"""Make CIRCOS plot of recurrent regions in RUBIC and GISTIC2.0"""
input:
seg="Reports/Circos/Segments.txt",
gistic="Reports/Circos/GISTIC_results.txt",
rubic="Reports/Circos/RUBIC_results.txt",
seg="Circos/Segments.txt",
gistic="Circos/GISTIC_results.txt",
rubic="Circos/RUBIC_results.txt",
output:
"Reports/Circos/RecurrentRegions.png"
"Circos/RecurrentRegions.png"
params:
conf=workflow.basedir + "/scripts/circos/circos.conf"
conda:
......@@ -33,72 +35,72 @@ rule make_CIRCOS_plot:
rule add_legend_CIRCOS:
"""Add a custom legend to the CIRCOS plot."""
input:
circos="Reports/Circos/RecurrentRegions.png",
circos="Circos/RecurrentRegions.png",
output:
legend="Reports/Circos/legend.png",
circos="Reports/Circos/RecurrentRegions_legend.png"
legend="Circos/legend.png",
circos="Circos/RecurrentRegions_legend.png"
run:
make_CIRCOS_legend(input.circos, output.legend, output.circos)
rule make_CIRCOS_zoom_input: #necessary?
"""Make input files for making a circos diagram."""
rule make_bed_genes_census:
input:
bed="Reports/Overlap_known_genes.bed"
gene_file=os.path.join(workflow.basedir, config["census_genes"])
output:
gistic="Reports/Circos/Zoom/GISTIC.txt",
rubic="Reports/Circos/Zoom/RUBIC.txt",
genes="Reports/Circos/Zoom/Genes.txt",
bed="Reports/Locations_census_genes.bed"
params:
ref=os.path.join(workflow.basedir, config["reference"]),
biomart=os.path.join(workflow.basedir, config["biomart_genes"])
run:
bed_to_circos(input.bed, output.rubic, output.gistic, output.genes)
make_bed_genes(input.gene_file, params.biomart, output.bed, params.ref)
def get_list_genes(overlapping_genes, locations_known_genes):
plot_list = []
list_overlapping_genes = []
with open(overlapping_genes, 'r') as plot_genes:
for line in plot_genes:
chrom, start, end = line.strip().split("\t")
chrom = "chr" + chrom.strip("hs")
list_overlapping_genes.append([chrom, start, end])
with open(locations_known_genes, 'r') as known_genes:
known_genes.readline()
for line in known_genes:
chrom, start, end, gene_name = line.strip().split("\t")
if [chrom, start, end] in list_overlapping_genes:
plot_list.append(gene_name)
return(plot_list)
rule make_bed_genes_known:
input: