Commit 9b73d366 authored by Beatrice Tan's avatar Beatrice Tan

Added option to use all gene ontologies for GO enrichment analysis.

parent b17d4862
......@@ -6,7 +6,7 @@ rule go_analysis:
go="GO/{tool}.txt"
params:
organism="hsapiens", #default is human
ontology="MF" #MF, BP or CC
ontology="BP" #MF, BP, CC or all
benchmark:
"Benchmarks/topGO." + str(datetime.datetime.now()) + ".txt"
wrapper:
......@@ -15,11 +15,11 @@ rule go_analysis:
rule compare_enriched_terms:
"""Compare the top 50 GO terms detected by RUBIC and GISTIC2.0"""
input:
expand("GO/{tool}.txt", tool=["GISTIC2", "RUBIC"])
go=expand("GO/{tool}.txt", tool=["GISTIC2", "RUBIC"])
output:
"GO/comparison.txt"
run:
compare_go(input[0], output[0])
compare_go(input.go, output[0])
##Rules to compare top 50 GO terms.
......@@ -27,9 +27,8 @@ def compare_go(GO_files, output_file):
"""Report enriched GO terms detected by both tools"""
go_terms = get_go(GO_files)
overlap = set(go_terms[0]).intersection(set(go_terms[1]))
print(overlap)
print("Percentage overlapping: " + str(len(overlap) / 50 * 100))
with open(output_file, 'r') as out:
with open(output_file, 'w') as out:
out.write("Percentage overlapping: " + str(int(len(overlap) / 50 * 100)))
out.write("\n".join(list(overlap)))
def get_go(GO_files):
......@@ -40,7 +39,7 @@ def get_go(GO_files):
with open(tool_file, 'r') as tool:
tool.readline()
for line in tool:
go_term = line.split('" "')
go_terms_tool += go_term
go_terms += go_terms_tool
go_term = line.split(' ')[1].strip('"')
go_terms_tool += [go_term]
go_terms.append(go_terms_tool)
return go_terms
......@@ -8,7 +8,7 @@ rule install_gistic:
output:
config["gisticdir"]
shell:
"{workflow.basedir}/rules/install_gistic2.sh {output}"
"{workflow.basedir}/scripts/install_gistic2.sh {output}"
rule run_gistic:
"""Run GISTIC2 for the tumor segmentation data."""
......@@ -23,7 +23,7 @@ rule run_gistic:
ref_file="",
extra=""
benchmark:
"Benchmarks/GISTIC2." + str(datetime.datetime.now()) + ".txt"
"Benchmarks/GISTIC2." + str(datetime.datetime.now()) + ".txt" #space between date and time!
wrapper:
"file:" + workflow.basedir + "/wrappers/GISTIC2"
......
......@@ -25,9 +25,12 @@ genesOfInterest <- as.character(genesOfInterest$V1)
geneList <- factor(as.integer(geneUniverse %in% genesOfInterest))
names(geneList) <- geneUniverse
#Select ontology
ontologies <- if (snakemake@params[["ontology"]] == "all") c("BP", "CC", "MF") else snakemake@params[["ontology"]]
#Combine data
GOdata <- new("topGOdata",
ontology=snakemake@params[["ontology"]],
ontology=ontologies,
allGenes = geneList,
annot = annFUN.gene2GO,
gene2GO = geneID2GO)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment