Commit 62e0c530 authored by San Leon Granado's avatar San Leon Granado
Browse files

Upload New File

parent 49f96bbd
---
title: "R Notebook"
output: html_notebook
---
```{r}
library(pheatmap)
library(ChIPseeker)
library(ComplexHeatmap)
library(TxDb.Mmusculus.UCSC.mm10.ensGene)
library( "gplots" )
library( "RColorBrewer" )
txdb <- TxDb.Mmusculus.UCSC.mm10.ensGene
```
# Annotation UCSC known genes 22-03-2019
```{r}
txdb=makeTxDbFromUCSC(genome="mm10")
```
```{r}
annotatePeakToDf=function(annotatepeakobject){
annoDf=as.data.frame(annotatepeakobject@anno@elementMetadata@listData)
annoDf$annoGroup=factor(ifelse(grepl("exon",annoDf$annotation),"exon",
ifelse(grepl("intron",annoDf$annotation),"intron",
ifelse(grepl("Downstream",annoDf$annotation),"Downstream",
ifelse(grepl("Promoter",annoDf$annotation),"Promoter",
as.character(annoDf$annotation)
)
)
)
)
)
return(annoDf)
}
```
# Expression RNA vs ChIPseq and methylation
We load the expression data
```{r}
expression=read.table("../data/Zbtb24vsWT_serum_genderAdjusted.csv",sep="\t",header=TRUE)
```
# Plots for RNAseq vs DMRs
And the list of genes with a DMR (Zbtb24 hypermethylated)
```{r}
Zbtb24DMRhyper=read.table("../data/HyperGenes.txt")
Zbtb24DMRHyperPromotor=read.table("../data/myList2.txt")
Zbtb24DMR=read.table("../data/WTvsZbtb24.DSS.dmrs.csv",sep="\t",header=TRUE)
```
I annotate the peaks with Chipseeker to obtain the genes with the Promotor annotated DMRs
```{r echo=FALSE}
peakAnno <- annotatePeak("../data/Zbtb24vsWT.hyper.bed", tssRegion=c(-2000, 2000),
TxDb=txdb, annoDb="org.Mm.eg.db")
peakAnno.df=annotatePeakToDf(peakAnno)
```
And if we annotate hypomethylated DMRs
```{r echo=FALSE}
peakAnnoHypo <- annotatePeak("../data/Zbtb24DMRHypo.bed", tssRegion=c(-2000, 2000),
TxDb=txdb, annoDb="org.Mm.eg.db")
peakAnnoHypo.df=annotatePeakToDf(peakAnnoHypo)
```
Now, we are going to plot a heatmap with the expression levels over the genes with a DMR in the promotor, only genes with expression are included
```{r }
Zbtb24DMR_promotor=peakAnno.df[peakAnno.df$annoGroup=="Promoter",]
expressionGenesDMR=expression[!is.na(expression$padj) & expression$gene%in%Zbtb24DMR_promotor$SYMBOL,]
row.names(expressionGenesDMR)=expressionGenesDMR$gene
pheatmap(expressionGenesDMR[,c("normS001","normS002","normS003","normS004","normS005","normS006")],scale="row")
pdf("../images/RNASEQ.heatmap.expression.ALLgenes.With.promoterDMR.pdf",width = 2,height = 10)
pheatmap(expressionGenesDMR[,c("normS001","normS002","normS003","normS004","normS005","normS006")],scale="row",show_rownames = FALSE)
dev.off()
```
And the same but with Hypo
```{r }
Zbtb24DMRHypo_promotor=peakAnnoHypo.df[peakAnnoHypo.df$annoGroup=="Promoter",]
expressionGenesHypoDMR=expression[!is.na(expression$padj) & expression$gene%in%Zbtb24DMRHypo_promotor$SYMBOL,]
row.names(expressionGenesHypoDMR)=expressionGenesHypoDMR$gene
pheatmap(expressionGenesHypoDMR[,c("normS001","normS002","normS003","normS004","normS005","normS006")],scale="row")
pdf("../images/RNASEQ.heatmap.expression.ALLgenes.With.promoterDMRHypo.pdf",width = 2,height = 5)
pheatmap(expressionGenesHypoDMR[,c("normS001","normS002","normS003","normS004","normS005","normS006")],scale="row",show_rownames = FALSE)
dev.off()
```
Now, we filter the genes, we select only the differential expressed genes
```{r }
expressionGenesDMR$typeDMR="Hypermethylated"
expressionGenesHypoDMR$typeDMR="Hypomethylated"
expressionGenesDMR=rbind(expressionGenesDMR,expressionGenesHypoDMR)
expressionGenesDMR.significant=expressionGenesDMR[expressionGenesDMR$padj<0.05 & abs(expressionGenesDMR$log2FoldChange)>=1,]
rowAnnotation=data.frame(row.names=row.names(expressionGenesDMR.significant),typeDMR=expressionGenesDMR.significant$typeDMR)
ann_colors = list(
typeDMR = brewer.pal(3, "Set1")[1:2]
)
names(ann_colors$typeDMR)=unique(expressionGenesDMR$typeDMR)
pheatmap(expressionGenesDMR.significant[,c("normS001","normS002","normS003","normS004","normS005","normS006")],
scale="row",
annotation_row =rowAnnotation,
annotation_colors = ann_colors)
pdf("../images/RNASEQ.heatmap.expression.DEgenes.With.promoterHypoHyperDMR.pdf",width = 5,height = 3)
pheatmap(expressionGenesDMR.significant[,c("normS001","normS002","normS003","normS004","normS005","normS006")],scale="row",annotation_row =rowAnnotation )
dev.off()
```
Just in case, I will check if the DMRs are over other regions (genebody), it affects the expression
```{r}
Zbtb24DMR_promotor_geneBody=peakAnno.df[peakAnno.df$annoGroup!="Distal Intergenic" & peakAnno.df$annoGroup!="Downstream",]
Zbtb24HypoDMR_promotor_geneBody=peakAnno.df[peakAnnoHypo.df$annoGroup!="Distal Intergenic" & peakAnnoHypo.df$annoGroup!="Downstream",]
expressionGenesDMRBody=expression[!is.na(expression$padj) & expression$gene%in%Zbtb24DMR_promotor_geneBody$SYMBOL,]
row.names(expressionGenesDMRBody)=expressionGenesDMRBody$gene
expressionGenesDMRBody$typeDMR="Hypermethylated"
expressionGenesHypoDMRBody=expression[!is.na(expression$padj) & expression$gene%in%Zbtb24HypoDMR_promotor_geneBody$SYMBOL,]
row.names(expressionGenesHypoDMRBody)=expressionGenesHypoDMRBody$gene
expressionGenesHypoDMRBody$typeDMR="Hypomethylated"
expressionGenesDMRBody=rbind(expressionGenesDMRBody,expressionGenesHypoDMRBody)
rowAnnotation_genebody=data.frame(row.names=row.names(expressionGenesDMRBody),typeDMR=expressionGenesDMRBody$typeDMR)
pdf("../images/RNASEQ.heatmap.expression.ALLgenes.With.promoter.body.DMR.pdf",width = 2,height = 10)
pheatmap(expressionGenesDMRBody[,c("normS001","normS002","normS003","normS004","normS005","normS006")],scale="row",show_rownames = FALSE)
dev.off()
expressionGenesDMRBody.DE=expressionGenesDMRBody[expressionGenesDMRBody$padj<0.05 & abs(expressionGenesDMRBody$log2FoldChange)>=1,]
rowAnnotation_genebody.DE=data.frame(row.names=row.names(expressionGenesDMRBody.DE),typeDMR=expressionGenesDMRBody.DE$typeDMR)
pdf("../images/RNASEQ.heatmap.expression.DEgenes.With.promoter.body.DMR.pdf",width = 2,height = 5)
pheatmap(expressionGenesDMRBody.DE[,c("normS001","normS002","normS003","normS004","normS005","normS006")],
scale="row",
show_rownames = FALSE,
annotation_row =rowAnnotation_genebody.DE)
dev.off()
pdf("../images/RNASEQ.heatmap.expression.DEgenes.With.promoter.body.DMR.pdf",width = 3,height = 25)
pheatmap(expressionGenesDMRBody[expressionGenesDMRBody$padj<0.05,c("normS001","normS002","normS003","normS004","normS005","normS006")],scale="row")
dev.off()
```
I want to see if the RNASEQ samples cluster together when I check the genes with DMR
```{r}
sampleDists <- dist( t( expressionGenesDMRBody[,c("normS001","normS002","normS003","normS004","normS005","normS006")] ) )
sampleDistMatrix <- as.matrix( sampleDists )
rownames(sampleDistMatrix) <- c("WT_rep1","WT_rep2","WT_rep3","Zbtb24_rep1","Zbtb24_rep2","Zbtb24_rep3" )
colnames(sampleDistMatrix) <- NULL
colours = colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pdf("../images/RNASEQ.heatmap.samples.ALLgenes.With.promoter.body.DMR.pdf")
heatmap( sampleDistMatrix, col=colours)
dev.off()
sampleDists <- dist( t( expressionGenesDMR[,c("normS001","normS002","normS003","normS004","normS005","normS006")] ) )
sampleDistMatrix <- as.matrix( sampleDists )
rownames(sampleDistMatrix) <- c("WT_rep1","WT_rep2","WT_rep3","Zbtb24_rep1","Zbtb24_rep2","Zbtb24_rep3" )
colnames(sampleDistMatrix) <- NULL
colours = colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pdf("../images/RNASEQ.heatmap.samples.ALLgenes.With.promoter.DMR.pdf")
heatmap( sampleDistMatrix, col=colours)
dev.off()
```
# ChIPseq vs RNAseq (-1Kb-peak region-1Kb)
The next step is try to see the expression over the genes with a ChIP-seq peak.
The first ste is to annotate the peaks to extract the genes.
```{r}
peakAnnoChIP <- annotatePeak("../data/Zbtb24mESCtrack.bed", tssRegion=c(-2000, 2000),
TxDb=txdb, annoDb="org.Mm.eg.db")
peakAnnoChIP.df=annotatePeakToDf(peakAnnoChIP)
```
The second step is filter the list to include only peaks over PROMOTER
```{r}
Zbtb24ChIP_promotor=peakAnnoChIP.df[peakAnnoChIP.df$annoGroup=="Promoter",]
expressionGenesChIP=expression[!is.na(expression$padj) & expression$gene%in%c(as.character(Zbtb24ChIP_promotor$SYMBOL),"Camkmt","Wasf1","Chd1"),]
row.names(expressionGenesChIP)=expressionGenesChIP$gene
```
I read the annotation and methylation measures over the -1kb peak +1kb region
```{r}
methlevel=read.table("../data/methAverage.csv",sep="\t",header=TRUE)
methlevel.genes=methlevel[methlevel$gene!="",]
methlevel.genes$totalDiff=methlevel.genes$NumberHyper.15..+methlevel.genes$NumberHypo.15..
methlevel.genes$totalDiff5=methlevel.genes$NumberHyper.5..+methlevel.genes$NumberHypo.5..
methlevel.genes.annot=data.frame(row.names=methlevel.genes$gene,
percentageHyper15=methlevel.genes$NumberHyper.15../methlevel.genes$totalDiff*100,
percentageHypo15=methlevel.genes$NumberHypo.15../methlevel.genes$totalDiff*100,
percentageHyper5=methlevel.genes$NumberHyper.5../methlevel.genes$totalDiff5*100,
percentageHypo5=methlevel.genes$NumberHypo.5../methlevel.genes$totalDiff5*100,
nHyper15=methlevel.genes$NumberHyper.15..,
nHypo15=methlevel.genes$NumberHypo.15..,
nHyper5=methlevel.genes$NumberHyper.5..,
nHypo5=methlevel.genes$NumberHypo.5..,
deltamethAverage=methlevel.genes$methAverage,
region=methlevel.genes$Position
)
methlevel.genes.annot=methlevel.genes.annot[order(row.names(methlevel.genes.annot)),]
ann_colors = list(
percentageHyper15 = colorRampPalette(rev(brewer.pal(n = 7, name = "Reds")))(n = 100),
percentageHypo15 = colorRampPalette(rev(brewer.pal(n = 7, name = "Reds")))(n = 100),
percentageHyper5 = colorRampPalette(rev(brewer.pal(n = 7, name = "Blues")))(n = 100),
percentageHypo5 = colorRampPalette(rev(brewer.pal(n = 7, name = "Blues")))(n = 100)
)
```
And now I plot the heatmap with genes with a peak in the promotor region
```{r}
my_palette <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(n = 100)
pheatmap(expressionGenesChIP[,c("normS001","normS002","normS003","normS004","normS005","normS006")],
color=my_palette,
scale="row",
annotation_row = methlevel.genes.annot,
annotation_colors = ann_colors
)
pdf("../images/RNASEQ.heatmap.expression.ALLgenes.With.promoterChIPseq.pdf",width = 5,height = 7)
pheatmap(expressionGenesChIP[,c("normS001","normS002","normS003","normS004","normS005","normS006")],
color=my_palette,
scale="row",
annotation_row = methlevel.genes.annot,
annotation_colors = ann_colors
)
dev.off()
```
And now the genes with overlapping peaks (promoter and gene body), in the case of Riok2 , in the plot is Chd1.
Other important thing, there two cases where the peak affects two genes.
```{r}
#Zbtb24ChIP_promotor_body=peakAnnoChIP.df[peakAnnoChIP.df$annoGroup!="Distal Intergenic" & peakAnnoChIP.df$annoGroup!="Downstream",]
expressionGenesChIP_body=expression[expression$gene%in%as.character(methlevel.genes$gene),]
row.names(expressionGenesChIP_body)=expressionGenesChIP_body$gene
expressionGenesChIP_body=expressionGenesChIP_body[order(expressionGenesChIP_body$gene),]
methlevel.genes=methlevel.genes[order(methlevel.genes$gene),]
```
Heatmap with peaks over promotor and gene body
```{r}
pheatmap(expressionGenesChIP_body[,c("normS001","normS002","normS003","normS004","normS005","normS006")],
scale="row",
annotation_row = methlevel.genes.annot,annotation_colors = ann_colors )
pdf("../images/RNASEQ.heatmap.expression.ALLgenes.With.promoter.body.ChIPseq.pdf",width = 6,height = 7)
pheatmap(expressionGenesChIP_body[,c("normS001","normS002","normS003","normS004","normS005","normS006")],
scale="row",
annotation_row = methlevel.genes.annot,annotation_colors = ann_colors )
dev.off()
```
The same but with ComplexHeatmap
```{r}
library(ComplexHeatmap)
library(circlize)
col_fun = colorRamp2(c(0, 50, 100), c("blue", "white", "red"))
col_fun2 = colorRamp2(c(-5, 0, 10), c("blue", "white", "red"))
col_fun3=c("Promoter"="goldenrod1","3UTR"="darkseagreen2","intron"="chartreuse3","exon"="deepskyblue3")
methlevel.genes=methlevel.genes[order(methlevel.genes$gene),]
Heatmap.annot=rowAnnotation(barDiff15=anno_barplot(cbind(methlevel.genes$NumberHyper.15../methlevel.genes$totalDiff*100,
methlevel.genes$NumberHypo.15../methlevel.genes$totalDiff*100),
gp = gpar(fill = 2:3, col = 2:3)),
position=methlevel.genes$Position,
diffMethAverage=methlevel.genes$methAverage,
col = list(diffMethAverage=col_fun2,position=col_fun3),
annotation_legend_param=list(diffMethAverage=list(at=c(-3,0,5,10,15)))
)
ha = HeatmapAnnotation(foo = anno_barplot(cbind(1:10, 10:1),
gp = gpar(fill = 2:3, col = 2:3)))
samples.annot=columnAnnotation(condition=c("WT","WT","WT","Zbtb24","Zbtb24","Zbtb24"))
data=as.matrix(expressionGenesChIP_body[,c("normS001","normS002","normS003","normS004","normS005","normS006")])
base_mean = rowMeans(data)
mat_scaled = t(apply(data, 1, scale))
pdf("../images/RNASEQ.heatmap.ChIPseq.COmplexHeatmap.pdf",width = 3.5,height = 5)
Heatmap(mat_scaled,left_annotation=Heatmap.annot,col=colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(n = 100))
decorate_annotation("barDiff15", {
grid.lines(c(0, 100), unit(c(50, 50), "native"), gp = gpar(lty = 2, col = "blue"))
})
dev.off()
```
# GSEA analysis between RNAseq and DMRs
```{r}
library(clusterProfiler)
entrez=bitr(expression$EnsemblID,from="ENSEMBL",to="ENTREZID",OrgDb="org.Mm.eg.db")
expression$entrez=sapply(expression$EnsemblID,function(x){entrez[x,"ENTREZID"][1]})
expression$rank=-log10(expression$pvalue)*expression$log2FoldChange
expressionOnlyEntrez=expression[!is.na(expression$pvalue) & !is.na(expression$entrez),]
expressionOnlyEntrez.agg=aggregate(expressionOnlyEntrez$log2FoldChange,by=list(expressionOnlyEntrez$entrez),mean)
myExpressedList=expressionOnlyEntrez.agg$x
names(myExpressedList)=as.character(expressionOnlyEntrez.agg$Group.1)
myExpressedList=sort(myExpressedList,decreasing = TRUE)
Hyper2gene=bitr(Zbtb24DMR_promotor$ENSEMBL,from="ENSEMBL",to="ENTREZID",OrgDb="org.Mm.eg.db")
Hyper2gene$DMR="Hypermethylated"
DMR2gene=Hyper2gene[, c("DMR", "ENTREZID")]
DMR2gene=unique(DMR2gene)
DMR2name=DMR2gene[, c("DMR", "DMR")]
```
And now we plot it
```{r}
y = GSEA(myExpressedList, TERM2GENE=DMR2gene, TERM2NAME=DMR2name,pvalueCutoff = 1)
gseaplot(y, "Hypermethylated")
pdf("../images/GSEA.hyperDMR.sortedByFC.pdf")
gseaplot(y, "Hypermethylated")
dev.off()
expressionOnlyEntrez2.agg=aggregate(expressionOnlyEntrez$rank,by=list(expressionOnlyEntrez$entrez),mean)
myExpressedList2=expressionOnlyEntrez2.agg$x
names(myExpressedList2)=as.character(expressionOnlyEntrez2.agg$Group.1)
myExpressedList2=sort(myExpressedList2,decreasing = TRUE)
y2 = GSEA(myExpressedList2, TERM2GENE=DMR2gene, TERM2NAME=DMR2name,pvalueCutoff = 1)
gseaplot(y2, "Hypermethylated")
```
If we use other type of ranking: -log10(pvalue)*log(FC)
```{r}
expressionOnlyEntrez2.agg=aggregate(expressionOnlyEntrez$rank,by=list(expressionOnlyEntrez$entrez),mean)
myExpressedList2=expressionOnlyEntrez2.agg$x
names(myExpressedList2)=as.character(expressionOnlyEntrez2.agg$Group.1)
myExpressedList2=sort(myExpressedList2,decreasing = TRUE)
y2 = GSEA(myExpressedList2, TERM2GENE=DMR2gene, TERM2NAME=DMR2name,pvalueCutoff = 1)
gseaplot(y2, "Hypermethylated")
```
```{r}
Hyper2geneBody=bitr(Zbtb24DMR_promotor_geneBody$ENSEMBL,from="ENSEMBL",to="ENTREZID",OrgDb="org.Mm.eg.db")
Hyper2geneBody$DMR="Hypermethylated"
DMR2geneBody=Hyper2geneBody[, c("DMR", "ENTREZID")]
DMR2geneBody=unique(DMR2geneBody)
DMR2nameBody=DMR2geneBody[, c("DMR", "DMR")]
yBody2 = GSEA(myExpressedList, TERM2GENE=DMR2geneBody, TERM2NAME=DMR2name,pvalueCutoff = 1)
y2Body2 = GSEA(myExpressedList2, TERM2GENE=DMR2geneBody, TERM2NAME=DMR2nameBody,pvalueCutoff = 1)
gseaplot(yBody2, "Hypermethylated")
gseaplot(y2Body2, "Hypermethylated")
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment