source: trunk/ontoCAT/src/uk/ac/ebi/ontocat/examples/R/Example1.R @ 464

Revision 464, 3.4 KB checked in by nata_courby, 2 years ago (diff)

New R package version in examples

Line 
1#   Copyright (c) 2010 - 2011 European Molecular Biology Laboratory
2#
3#   Licensed under the Apache License, Version 2.0 (the "License");
4#   you may not use this file except in compliance with the License.
5#   You may obtain a copy of the License at
6#
7#       http://www.apache.org/licenses/LICENSE-2.0
8#
9#   Unless required by applicable law or agreed to in writing, software
10#   distributed under the License is distributed on an "AS IS" BASIS,
11#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12#   See the License for the specific language governing permissions and
13#   limitations under the License.
14
15##########################################################
16# Gene enrichment test by using ontoCAT R package
17# Please use the full version of ontoCAT R package: https://sourceforge.net/projects/ontocat/files/ontoCAT/ontoCAT_R/ontoCAT_1.2.1.tar.gz
18##########################################################
19
20#Java Heap size needed to reason over GO ontology (more than 20 MB in size) is 512MB.
21#Here are the instructions how to increase Java Heap Size in R:
22
23library(rJava)
24options(java.parameters="-Xmx512")
25.jinit()
26
27#To check the result:
28.jcall(.jnew("java/lang/Runtime"), "J", "maxMemory")
29#Now it is possible to work with large ontologies like GO
30
31library(RCurl)
32library(rjson)
33
34# Define a function to retrieve some genes from Gene Expression Atlas (http://www.ebi.ac.uk/gxa/)
35options(GXAAPIURL="http://www.ebi.ac.uk/gxa/api?")
36queryGeneUp <- function(query) fromJSON(getURL(paste(getOption("GXAAPIURL"), "upIn=", query, sep = "")))
37
38# Create a list of over-expressed genes in pancreas vs other tissues
39g <- queryGeneUp("pancreas")
40
41# g$results is a list of 100 genes, each accessible by g$results[[i]]$gene
42names(g$results[[10]]$gene)
43
44# Get GO ontology
45library(ontoCAT)
46
47# Obtain GO ontology
48go <- getOntology("http://www.geneontology.org/ontology/obo_format_1_2/gene_ontology_ext.obo")
49
50# Obtain the list of all GOIDs using biomaRt
51library(biomaRt)
52mart <- useMart("ensembl")
53mart<-useDataset("hsapiens_gene_ensembl",mart)
54genes <- getBM(attributes=c("ensembl_gene_id","go_biological_process_id"), mart=mart)
55all.GOIDs <- unique(genes[,2])
56all.GOIDs <- all.GOIDs[2:length(all.GOIDs)]
57
58# Create a list of over-expressed genes
59genes.of.interest <- c(sapply(1:100,function(x) g$results[[x]]$gene$ensemblGeneId),recursive = TRUE)
60selection <- genes[which(genes[,1] %in% genes.of.interest),]
61
62# Create a list of genes for each GO ID
63genes.by.GOID <- sapply(1:length(all.GOIDs),function(x) genes[which(genes[,2] == all.GOIDs[x]),][1]$ensembl_gene_id)
64names(genes.by.GOID)=all.GOIDs
65
66# Create a list containing the number of genes in each category
67size.by.GOID<-lapply(genes.by.GOID,length)
68
69# Construct a list containing the overlap for each GO category with the genes of interest
70#by using ontoCAT library to get all children (subclasses) of particular GO term
71overlaps = lapply(names(genes.by.GOID), function(x) length(which(selection[,2] %in%
72gsub("GO_*", "GO:", c(lapply(getTermAndAllChildrenById(go,toString(gsub("GO:*", "GO_",x))),function(t) {getAccession(t)}),recursive = TRUE))
73)))
74
75# Create a list of hypergeometric p-values
76pvals = as.array(sapply(1:length(genes.by.GOID), function(x)
77    phyper(overlaps[[x]]-1,
78    size.by.GOID[[x]],
79    length(all.GOIDs) - size.by.GOID[[x]],
80    length(unique(selection[,1])),
81    lower.tail=FALSE)))
82names(pvals) =  names(genes.by.GOID)
83
84# Get the most enriched categories:
85pvals[order(pvals)][1:10]
86
87
88
89
Note: See TracBrowser for help on using the repository browser.