Commit 2d3389cc authored by Beatrice Tan's avatar Beatrice Tan

Removed ensembl function.

parent f7bc787b
#Directories to be specified
workdir: /home/bftan/CNA_results #directory to write output
gisticdir: /home/bftan/Tools/GISTIC2 #directory to install GISTIC2
#workdir: /home/bftan/CNA_results #directory to write output
#gisticdir: /home/bftan/Tools/GISTIC2 #directory to install GISTIC2
#workdir: /home/beatrice/CNA_analysis
#gisticdir: /home/beatrice/CNA_analysis/run_gistic2
workdir: /home/beatrice/CNA_analysis
gisticdir: /home/beatrice/CNA_analysis/run_gistic2
#Input details to download from firehose
cancer_type: SKCM
......
......@@ -46,13 +46,30 @@ def parse_gistic_results(results_file, known_genes, census_genes):
start, end = bp.split("-")
cnv_type = 'amp' if stats[0].startswith("Amplification") else 'del'
qval = log10(float(stats[5])) * -1 #Convert q-value to log10(q-value)
list_genes = genes_locus(chrom, int(start), int(end))
#list_genes = genes_locus(chrom, int(start), int(end))
result_dir, conf = results_file.split("all_lesions")
gene_file = os.path.join(result_dir, cnv_type + "_genes" + conf)
list_genes = gistic_genes(gene_file, CNV_id)
known_overlap = set(list_genes).intersection(set(known_genes))
census_overlap = set(list_genes).intersection(set(census_genes))
cnv_data = [chrom, start, end, cnv_type, qval, list(known_overlap), list(census_overlap), list_genes]
regions.append(cnv_data)
return regions
def gistic_genes(gene_file, CNV_id):
"""Extract list of genes for a given recurrent CNV from GISTIC2 output."""
out_genes = []
with open(gene_file, 'r') as genes:
for i in range(3):
genes.readline()
column_CNV = genes.readline().split("\t").index(CNV_id)
for line in genes:
if len(line.split("\t")) >= column_CNV:
gene = line.split("\t")[column_CNV]
if gene != '':
out_genes.append(gene)
return out_genes
def parse_rubic_results(results_gains, results_losses, known_genes, census_genes):
"""Create a list of lists with the recurrent regions called by GISTIC2."""
regions = []
......@@ -63,21 +80,23 @@ def parse_rubic_results(results_gains, results_losses, known_genes, census_genes
chrom, start, end, qval_left, qval_right, genes = lesion.split("\t")
CNV_id = "chr" + chrom + ":" + start + "-" + end
cnv_type = 'amp' if result_file.endswith('gains.txt') else 'del'
list_genes = genes_locus(chrom, int(start), int(end))
#list_genes = genes_locus(chrom, int(start), int(end))
list_genes = genes.strip().split(",")
print(list_genes)
known_overlap = set(list_genes).intersection(set(known_genes))
census_overlap = set(list_genes).intersection(set(census_genes))
cnv_data = [chrom, start, end, cnv_type, (qval_left, qval_right), list(known_overlap), list(census_overlap), list_genes]
regions.append(cnv_data)
return regions
def genes_locus(chrom, start, end):
"""Get list of gene IDs at certain location."""
IDs = []
gene_info = ensembl.genes_at_locus(chrom, start, end)
for gene in gene_info:
ID = gene.gene_id
IDs.append(ID)
return IDs
#def genes_locus(chrom, start, end):
# """Get list of gene IDs at certain location."""
# IDs = []
# gene_info = ensembl.genes_at_locus(chrom, start, end)
# for gene in gene_info:
# ID = gene.gene_id
# IDs.append(ID)
# return IDs
def gene_file_IDs(gene_file, top_nr):
"""Parse a file with a list of genes (one gene per line or first column).
......@@ -90,11 +109,12 @@ def gene_file_IDs(gene_file, top_nr):
continue
else:
gene = line.split("\t")[0]
try:
gene_ID = ensembl.gene_ids_of_gene_name(gene)
list_genes = list_genes + gene_ID
except:
pass
list_genes.append(gene)
#try:
# gene_ID = ensembl.gene_ids_of_gene_name(gene)
# list_genes = list_genes + gene_ID
#except:
# pass
if top_nr != np.inf:
list_genes = list_genes[0:top_nr]
return list_genes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment