G:\r\tcga_example-master\scripts
library(survival)
library(survminer)
## 批量生存分析 使用 coxph 回归方法
# http://www.sthda.com/english/wiki/cox-proportional-hazards-model
colnames(phe)
head(phe) #event中1 表示存活(终点事件)
#https://rpubs.com/kaz_yos/survival-auc
mySurv=with(phe,Surv(time, event)) # ## Create a survival vector
head(mySurv) #surv的结果只是一列一连串的生存时间汇总。所以一般会提倡用phe$mysurv这种方式把结果整合到原始数据框中便于之后的分析
#生存曲线与结果汇总
ggsurvplot(survfit(mySurv ~ 1, data = phe))
ggsurvplot(survfit(mySurv ~ age_group, data = phe), label.curves = list(method = "arrow", cex = 1.2))
library(survival)
data(pbc)
pbc <- within(pbc, {
## transplant (1) and death (2) are considered events, and marked 1
event <- as.numeric(status %in% c(1,2))
## Create a survival vector
Surv <- Surv(time, event)
})
head(pbc)
ggsurvplot(survfit(Surv ~ sex, data = pbc), label.curves = list(method = "arrow", cex = 1.2))
ggsurvplot(survfit(Surv ~ albumin >= median(albumin), data = pbc), label.curves = list(method = "arrow", cex = 1.2))
exprSet[1:3,1:3]
#https://zhuanlan.zhihu.com/p/130316068
cox_results <-apply(exprSet , 1 , function(gene){
group=ifelse(gene>median(gene),‘high’,‘low’)
survival_dat <- data.frame(group=group,stage=phe
s
t
a
g
e
,
a
g
e
=
p
h
e
stage,age=phe
stage,age=pheage, #新建存活矩阵,试图阐明某个基因的高低表达对生存率的影响
gender=phe$gender,
stringsAsFactors = F)
m=coxph(mySurv ~ gender + age + stage+ group, data = survival_dat) #矫正其他因素
beta <- coef(m)
se <- sqrt(diag(vcov(m)))
HR <- exp(beta)
HRse <- HR * se
#summary(m)
tmp <- round(cbind(coef = beta, se = se, z = beta/se, p = 1 - pchisq((beta/se)^2, 1),
HR = HR, HRse = HRse,
HRz = (HR - 1) / HRse, HRp = 1 - pchisq(((HR - 1)/HRse)^2, 1),
HRCILL = exp(beta - qnorm(.975, 0, 1) * se),
HRCIUL = exp(beta + qnorm(.975, 0, 1) * se)), 3)
return(tmp[‘grouplow’,])
})
cox_results=t(cox_results)
head(cox_results)
table(cox_results[,4]<0.05)
cox_results[cox_results[,4]<0.05,] #coxph 回归方法得到p<0.05的所有miRNA, 接下来与logrank test 得到的结果对比一下
mySurv=with(phe,Surv(time, event))
log_rank_p <- apply(exprSet , 1 , function(gene){
phe
g
r
o
u
p
=
i
f
e
l
s
e
(
g
e
n
e
>
m
e
d
i
a
n
(
g
e
n
e
)
,
′
h
i
g
h
′
,
′
l
o
w
′
)
d
a
t
a
.
s
u
r
v
d
i
f
f
=
s
u
r
v
d
i
f
f
(
m
y
S
u
r
v
g
r
o
u
p
,
d
a
t
a
=
p
h
e
)
p
.
v
a
l
=
1
−
p
c
h
i
s
q
(
d
a
t
a
.
s
u
r
v
d
i
f
f
group=ifelse(gene>median(gene),'high','low') data.survdiff=survdiff(mySurv~group,data=phe) p.val = 1 - pchisq(data.survdiff
group=ifelse(gene>median(gene),′high′,′low′)data.survdiff=survdiff(mySurv group,data=phe)p.val=1−pchisq(data.survdiffchisq, length(data.survdiff$n) - 1)
return(p.val)
})
require(“VennDiagram”)
VENN.LIST=list(cox=rownames(cox_results[cox_results[,4]<0.05,]),#coxph 回归方法得到p<0.05的所有miRNA
log=names(log_rank_p[log_rank_p<0.05])) #logrank test 得到的p<0.05的所有miRNA
venn.plot <- venn.diagram(VENN.LIST , NULL,
fill=c(“darkmagenta”, “darkblue”),
alpha=c(0.5,0.5), cex = 2,
cat.fontface=4,
main=“overlap of coxph and log-rank test”)
grid.draw(venn.plot)
save(log_rank_p,cox_results ,
file =
file.path(Rdata_dir,‘TCGA-KIRC-miRNA-survival_results.Rdata’)
)