R語言農藝性狀表型數據與環境關聯互作代碼分析

發布時間：2022-03-21 10:03:41 來源：億速云閱讀：363 作者：iii 欄目：開發技術

這篇文章主要介紹了R語言農藝性狀表型數據與環境關聯互作代碼分析的相關知識，內容詳細易懂，操作簡單快捷，具有一定借鑒價值，相信大家閱讀完這篇R語言農藝性狀表型數據與環境關聯互作代碼分析文章都會有所收獲，下面我們一起來看看吧。

農藝性狀表型數據與環境關聯互作分析

local({r <- getOption("repos")  
r["CRAN"] <- "http://mirrors.tuna.tsinghua.edu.cn/CRAN/"   
options(repos=r)}) 
options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")
#install.packages("GGEBiplots")
#install.packages("GGEBiplotGUI")
library(GGEBiplotGUI)
library("GGEBiplots")
library("reshape2")
library(vegan)
library(corrplot)
library("psych")
library(Cairo)
setwd("D:/potato/")
data1<-read.table("張北data.txt",header = F,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8")
data.t<-as.data.frame(t(data1))
data.l<-melt(data.t,id.vars = c('年度', '性狀'),variable.name = "基因型")
mydata1<-dcast(data.l, `年度` + `基因型` ~ `性狀`)
#`單株產量` +   `淀粉含量（%）` + `干物質含量（%）` +  `平均出苗率（%）`+ `生育日數(天)`+ `株高`
data2<-read.table("康保data.txt",header = F,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8")
data.t<-as.data.frame(t(data2))
data.l<-melt(data.t,id.vars = c('年度', '性狀'),variable.name = "基因型")
mydata2<-dcast(data.l, `年度` + `基因型` ~ `性狀`)
#`單株產量` +   `淀粉含量（%）` + `干物質含量（%）` +  `平均出苗率（%）`+ `生育日數(天)` +   `株高`
#inter<-intersect(colnames(mydata1),colnames(mydata2))
#去掉一些數據
inter<-c("年度"   ,           "基因型"    ,        "單株產量"    ,     
 "淀粉含量(%)"    ,   "干物質含量(%)"  ,   "平均出苗率(%)"  ,  
"生育日數(天)"   ,  "株高" )
mydata1.c<-mydata1[,inter]
mydata2.c<-mydata2[,inter]
mydata1.c$`環境`="張北"
mydata2.c$`環境`="康保"
mydata=rbind(mydata1.c,mydata2.c)
########################計算平均值
#traits<-c("單株產量" , "淀粉含量(%)" , "干物質含量(%)" , "平均出苗率(%)", "生育日數(天)" , "小區產量" ,"折合單位產量kg/hm", "株高")

traits=c(   "單株產量"  ,        "淀粉含量(%)"  ,    
            "干物質含量(%)"  ,   "平均出苗率(%)" ,   
            "生育日數(天)"    ,       
            "株高"      )
for (i in traits){
  mydata[,i]=as.double(mydata[,i])
}
SD=round(apply(mydata[,traits],2,sd),2)
MEAN=round(apply(mydata[,traits],2,mean),2)
RANGE=round(apply(mydata[,traits],2,range),2)
CV=round(SD/MEAN*100,2)
H=round(diversity(mydata[,traits], index = "shannon", MARGIN = 2),2)
trait.mean=cbind(`平均值`=MEAN,`標準差`=SD,`變異系數（%）`=CV,`變幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多樣性指數 H’`=H)
write.csv(trait.mean,file="01.馬鈴薯表型性狀統計.csv")

#################################相關性分析
occor = corr.test(mydata[,traits],use="pairwise",method="spearman",adjust="BH",alpha=.05)
occor.r = occor$r  # 取相關性矩陣R值
occor.p = occor$p  # 取相關性矩陣p值
#設置色板
palette_2 <- colorRampPalette(c("yellow","red"))(100)
#繪圖
#png(file="traits_cor.png",w=10*300,h=10*300,res = 300)
pdf(file="traits_cor.pdf",w=10,h=10,family="GB1")
corrplot(occor.r, method = "square", type = "lower", order="AOE",
         col = palette_2, tl.pos = "tp", tl.col = "blue", cl.pos = "r",
         title = "Traits corr", mar = c(2,4,4,1))
corrplot(occor.r, method = "number", type = "upper",order="AOE",
         col = palette_2,add = TRUE, diag = FALSE, tl.pos = "n", cl.pos = "n", 
         number.cex=1,mar = c(2,4,4,1))
dev.off()
write.csv(occor.p,file="02.相關性P值.csv")
write.csv(occor.r,file="03.相關性R值.csv")
########################分布范圍柱狀圖
#########################求平均
genotype.mean=aggregate(mydata[,traits],by=list(`基因型`=mydata$`基因型`),mean)



for(t in traits){
  
  #t="平均出苗率(%)"
  d<-  genotype.mean[,t]
  ff=gsub("\\(\\S+\\)", "", t)
  ff=gsub("kg/hm", "", ff)
  
  
  pdf(paste0(ff,"-表型性狀值分布.pdf"),w=5,h=5,family="GB1")
  par(mar=c(6,4.1,2,4))
  xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d)),length.out=10),
                plot=F,xlab =t ,ylim=c(0,),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "樣本數",freq=T,main="")
  
  xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d)),length.out=10),
                plot=T,xlab ="" ,ylim=c(0,max(xhist$counts)*1.1),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "樣本數",freq=T,main="")
  
  mtext(t,side=1,line=4.5)
  axis(side=1,xhist$mids,labels=F)
  
  b=(xhist$mids[2]-xhist$mids[1])/2
  s=round(xhist$mids-b,2)
  s[1]=0
  e=round(xhist$mids+b,2)
  lab=paste(s,e,sep="-")
  
  text(x=xhist$mids, y=-3, labels=lab,cex=1,  xpd=TRUE, srt=45,adj=1)
  
  dd=density(d)
  
  
  
  par(new=T)
  plot(dd, ylim=c(0,max(dd$y)*1.1), col ='red',lwd=2,yaxs="i",xlab = "",ylab="",xaxt="n",yaxt="n",main = "") 
  axis(4, las=1,at=seq(from = 0, to = max(xhist$density), length.out = 6), labels=round(seq(0, max(xhist$density),length.out =6)/sum(xhist$density),2), col = 'red', col.axis = 'red')
  mtext("頻率",side=4,line=3,col="red")
  box(bty="l")
  dev.off()
}
##############GGEbioplots
ggdata=mydata[,c("環境","基因型","單株產量")]
ggdata1=aggregate(`單株產量` ~ 基因型 + 環境, data = mydata, mean)
gg.f=dcast(ggdata1,基因型  ~環境)
row.names(gg.f)=gg.f[,1]
gg.f=gg.f[,-1]*10
#GGEBiplot(Data = gg.f)
library(ggplot2)
GGE1<-GGEModel(gg.f,centering = "tester",
               SVP = "symmetrical",
               scaling = "none")
g1=GGEPlot(GGE1,type=6,sizeGen=4,sizeEnv = 2,largeSize=3)
g1=g1+ labs(title = "")
pdf(paste0("適應性分析bioplot.pdf"),w=5,h=5,family="GB1")
print(g1)
dev.off()

g2=GGEPlot(GGE1,type=9,sizeGen=2,sizeEnv = 3,largeSize=3)
g2=g2+ labs(title = "")
pdf(paste0("豐產性-穩定性分析bioplot.pdf"),w=5,h=5,family="GB1")
print(g2)
dev.off()

#################################表型主成分分析
p=genotype.mean
row.names(p)=p[,1]
p=p[,-1]
pca = prcomp(t(p), scale=TRUE)
#pca = prcomp(t(decostand(p, "hellinger")), scale=TRUE)
ss=summary(pca)
pc=pca$x
xx=ss$importance
row.names(xx)<-c("特征值E","貢獻率CR","累計貢獻率CCR")
pca.res=rbind(pc,xx)
write.csv(pca.res,file="04.PCA分析結果.csv")

#表型數據聚類分析
dist_mat <- dist(genotype.mean[,2:5], method = "euclidean")
clustering <- hclust(dist_mat, method = "complete")
plot(clustering,labels=genotype.mean$基因型,xlab="sample",ylab="Height" ,main="")
#根據進化樹分組
group.data=genotype.mean
group.data$group=as.factor(cutree(clustering,h=15))

names=c("基因型"   ,     "單株產量" ,     "淀粉含量"  , "干物質含量",
        "平均出苗率" ,"生育日數"  ,"株高"    ,      "group" )
colnames(group.data)=names
library("ggpubr")
phenotype=names[2:(length(names)-1)]
test.res=data.frame()
for(i in phenotype){
  f=as.formula(paste0("`",i,"`~group"))
  print(f)
  res=compare_means(f, data = group.data,method="t.test")
  print(res)
  test.res=rbind(test.res,res)
}
write.csv(test.res,file="聚類樹分組差異統計檢驗.csv")
write.csv(group.data,file="聚類樹分組信息表.csv")
g1.res=aggregate(group.data[,2:7],by=list(group.data$group),FUN=mean)
g2.res=aggregate(group.data[,2:7],by=list(group.data$group),FUN=sd)
write.csv(g1.res,file="聚類樹分組統計平均值.csv")
write.csv(g2.res,file="聚類樹分組統計標準差.csv")

#表型數據在不同環境下變異
for (i in traits){
  mydata1[,i]=as.double(mydata1[,i])
}
SD=round(apply(mydata1[,traits],2,sd),2)
MEAN=round(apply(mydata1[,traits],2,mean),2)
RANGE=round(apply(mydata1[,traits],2,range),2)
CV=round(SD/MEAN*100,2)
H=round(diversity(mydata1[,traits], index = "shannon", MARGIN = 2),2)
trait.mean=cbind(`平均值`=MEAN,`標準差`=SD,`變異系數（%）`=CV,`變幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多樣性指數 H’`=H)
write.csv(trait.mean,file="01.馬鈴薯表型性狀統計(張北).csv")

#表型數據在不同環境下變異
for (i in traits){
  mydata2[,i]=as.double(mydata2[,i])
}
SD=round(apply(mydata2[,traits],2,sd),2)
MEAN=round(apply(mydata2[,traits],2,mean),2)
RANGE=round(apply(mydata2[,traits],2,range),2)
CV=round(SD/MEAN*100,2)
H=round(diversity(mydata2[,traits], index = "shannon", MARGIN = 2),2)
trait.mean=cbind(`平均值`=MEAN,`標準差`=SD,`變異系數（%）`=CV,`變幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多樣性指數 H’`=H)
write.csv(trait.mean,file="01.馬鈴薯表型性狀統計(康保).csv")

######關聯分析
mydata$年度=as.factor(mydata$年度)
mydata$環境=as.factor(mydata$環境)
F.res=data.frame()
for (i in traits){
  print(i)
  fl=c(as.formula(paste0("`",i,"`~基因型")),
       as.formula(paste0("`",i,"`~環境")),
       as.formula(paste0("`",i,"`~年度")),
       as.formula(paste0("`",i,"`~基因型*環境")),
       as.formula(paste0("`",i,"`~基因型*年度"))
       #as.formula(paste0("`",i,"`~基因型*年度*環境"))
  )
  
  for(f in fl){ 
    blp=lm(f,data=mydata)
    aa=summary(blp)
    #
    #print(aa)
    print(aa$df)
    print(f)
    fstatistic = aa$fstatistic
    
    p_value = pf(as.numeric(fstatistic[1]), as.numeric(fstatistic[2]), as.numeric(fstatistic[3]), lower.tail = FALSE)
    print(p_value)
    aa=data.frame(`公式`=as.character(f)[3],df=aa$df[1],F=fstatistic[1],P=p_value,`性狀`=i)
    F.res=rbind(F.res,aa)
    
    
    # question1 <- readline("Would you like to proceed untill the loop ends? (Y/N)") 
    # if(regexpr(question1, 'y', ignore.case = TRUE) == 1){ 
    #   continue = TRUE 
    #   next 
    # } else{
    #   break
    # }
  }
  
}
write.csv(F.res,file="05.表型與基因型環境年度互作效應.csv")

第二版版本，注意PCA分析升級，輸入數據行為樣本，列為表型；

local({r <- getOption("repos")  
r["CRAN"] <- "http://mirrors.tuna.tsinghua.edu.cn/CRAN/"   
options(repos=r)}) 
options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")
#install.packages("GGEBiplots")
#install.packages("GGEBiplotGUI")
library(GGEBiplotGUI)
library("GGEBiplots")
library("reshape2")
library(vegan)
library(corrplot)
library("psych")
library(Cairo)
setwd("D:/potato/第二次分析")
mydata<-read.table("data.txt",header = T,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8")


########################計算平均值

traits=colnames(mydata)
for (i in traits){
  mydata[,i]=as.double(mydata[,i])
}
SD=round(apply(mydata[,traits],2,sd),2)
MEAN=round(apply(mydata[,traits],2,mean),2)
RANGE=round(apply(mydata[,traits],2,range),2)
CV=round(SD/MEAN*100,2)
H=round(diversity(mydata[,traits], index = "shannon", MARGIN = 2),2)
J=round(H/log(specnumber(t(mydata[,traits]))),2)

trait.mean=cbind(`平均值`=MEAN,`標準差`=SD,`變異系數（%）`=CV,`變幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="~"),`H’`=H,J=J)
write.csv(trait.mean,file="01.馬鈴薯表型性狀統計.csv")


########################分布范圍柱狀圖
#########################求平均


for(t in traits){
  
  ##t="頂小葉寬"
  #t="出苗率"
  d<-  mydata[,t]
  pdf(paste0(t,"-表型性狀值分布.pdf"),w=5,h=5,family="GB1")
  par(mar=c(6,4.1,2,4))
  
  MIN=min(d)
  MAX=max(d)
  step=(MAX-MIN)/(length(d)^(1/2)+1)
  
  xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d))+step,by=step),
                plot=F,xlab =t ,ylim=c(0,),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "樣本數",freq=T,main="")
  
  xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d))+step,by=step),
                plot=T,xlab ="" ,ylim=c(0,max(xhist$counts)*1.1),col="green",xaxt="n", yaxs="i",fill=NULL,ylab = "樣本數",freq=T,main="")
  
  mtext(t,side=1,line=4.5)
  axis(side=1,xhist$mids,labels=F)
  
  
  s=round(xhist$mids-step,2)
  if (s[1]<0) {s[1]=0}
  e=round(xhist$mids+step,2)
  lab=paste(s,e,sep="-")
  
  text(x=xhist$mids, y=-3, labels=lab,cex=1,  xpd=TRUE, srt=45,adj=1)
  axis(4, las=1,at=seq(from = 0, to = max(xhist$counts), length.out = 4), labels=paste0(round(seq(0, max(xhist$counts),length.out =4)/sum(xhist$counts),4)*100,"%"), col = 'red', col.axis = 'red')
  
  dd=density(d)
  par(new=T)
  plot(dd, ylim=c(0,max(dd$y)*1.1), col ='red',lwd=2,yaxs="i",xlab = "",ylab="",xaxt="n",yaxt="n",main = "") 
  #mtext("頻率",side=4,line=3,col="red")
  box(bty="l")
  l.at="topright"
  if(t=="出苗率" || t=="葉緣形狀"){
    l.at="topleft"
  }
  legend(l.at,legend = c("直方圖"), 
         fill = c('green'),  
         bty='n')
  legend(l.at,legend = c("正態圖"), 
         col = c("red"), lty=1, 
         bty='n',inset = c(0,0.08), lwd=3)
  dev.off()
}
################相關性分析###################
#################################相關性分析
occor = corr.test(mydata,use="pairwise",method="spearman",adjust="BH",alpha=.05)
occor.r = occor$r  # 取相關性矩陣R值
occor.p = occor$p  # 取相關性矩陣p值
#設置色板
palette_2 <- colorRampPalette(c("yellow","red"))(100)
#繪圖
#png(file="traits_cor.png",w=10*300,h=10*300,res = 300)
pdf(file="traits_cor.pdf",w=10,h=10,family="GB1")
corrplot(occor.r, method = "square", type = "lower", order="AOE",
         col = palette_2, tl.pos = "tp", tl.col = "blue", cl.pos = "r",
         title = "Traits corr", mar = c(2,8,4,1))
corrplot(occor.r, method = "number", type = "upper",order="AOE",
         col = palette_2,add = TRUE, diag = FALSE, tl.pos = "n", cl.pos = "n", 
         number.cex=1,mar = c(2,4,4,1))
dev.off()
write.csv(occor.p,file="02.相關性P值.csv")
write.csv(occor.r,file="03.相關性R值.csv")

###############因子分析###############
mydata.l=melt(mydata)
bt=bartlett.test(value~variable,data=mydata.l)
kmo=KMO(mydata)
sink("04.因子分析.txt")
bt
kmo
sink()


#################################表型主成分分析
#pca = prcomp(mydata, scale=TRUE)
pca = princomp(mydata, scores=TRUE, cor=TRUE)
#pca = prcomp(t(mydata), scale=TRUE)
fa1 = factanal(mydata, factor=2, rotation="varimax", scores="regression")
fa1
#write.csv(abs(fa1$loadings), "loadings.csv")
pdf(file="04.PCA分析特征根碎石圖.pdf",w=8,h=4,family="GB1")
screeplot(pca, type="line", main="Scree Plot") 
dev.off()

sink(file="04.PCA分析summary.txt")
summary(pca)
sink()
pc=loadings(pca)[1:21,]
pc=as.data.frame(pc)
write.csv(pc,file="04.PCA分析結果.csv")
library(RColorBrewer)
mycolor=c(brewer.pal(9, "Set1"),brewer.pal(8, "Set2"),brewer.pal(9, "Paired"))
pdf(file="04.PCA分析表型分布.pdf",w=7,h=6,family="GB1")
par(mar=c(5,4,4,10))
plot(x=pc[,1],y=pc[,2],cex=1.5,col=mycolor,pch=16,xlab="factor 1",ylab = "factor 2")
abline(h=0,lty=1)
abline(v=0,lty=1)
legend("topright",legend =rownames(pc) ,pch=16,col=mycolor,xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1)
dev.off()

#表型數據聚類分析
dist_mat <- dist(mydata, method = "euclidean")
clustering <- hclust(dist_mat, method = "complete")
pdf(file="05.聚類圖.pdf",w=15,h=4,family="GB1")
plot(clustering,labels=rownames(mydata),cex=0.55,xlab="sample",ylab="Height" ,main="")
abline(h=40,col="red")
dev.off()
#根據聚類樹分組
group.data=mydata
group.data$group=as.factor(cutree(clustering,h=40))


###################分組PCA圖################
#install.packages("ggfortify")
library(ggfortify)
pdf(file="04.PCA分析表型樣本雙序圖.pdf",w=5,h=4,family="GB1")
#par(mar=c(5,4,4,10))
 #pcs=pca$scores
 #plot(x=pcs[,1],y=pcs[,2],cex=1.5,col=mycolor[group.data$group],pch=1,xlab="factor 1",ylab = "factor 2")
 #par(new=T)
 #plot(x=pc[,1],y=pc[,2],cex=1.5,col="blue",pch=16,xlab="factor 1",ylab = "factor 2")
#legend("topright",legend =rownames(pc) ,pch=16,col=mycolor,xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1)

#legend("topright",legend =1:6 ,pch=16,col=mycolor[1:6],xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1)
#biplot(pca)
g=autoplot(pca, data = group.data, colour = 'group',
         loadings = TRUE, loadings.colour = 'blue',
         loadings.label = TRUE, loadings.label.size = 3,size=2)#,frame = TRUE, frame.type = 'norm')
g=g+scale_color_manual(name="group",values =mycolor)+
  scale_fill_manual(name="group",values =mycolor)+
  theme_bw()+ theme(  
    panel.grid=element_blank(), 
    axis.text.x=element_text(colour="black"),
    axis.text.y=element_text(colour="black"),
    panel.border=element_rect(colour = "black"),
    legend.key = element_blank(),
    plot.title = element_text(hjust = 0.5))
g
dev.off()


library("ggpubr")
phenotype=traits
test.res=data.frame()
for(i in phenotype){
  f=as.formula(paste0("`",i,"`~group"))
  print(f)
  fit=aov(f,data=group.data)
  ss=summary(fit)
  res=as.data.frame(ss[[1]])
  res$`表型`=i
  rownames(res)=c("組間","組內")
  print(res)
  test.res=rbind(test.res,res)
}
colnames(test.res)=c("Df","平方和SS","均方MS","F","顯著性","性狀trait")
write.csv(test.res,file="05.聚類樹分組差異統計檢驗.csv")
write.csv(group.data,file="05.聚類樹分組信息表.csv")
g1.res=aggregate(group.data[,traits],by=list(group.data$group),FUN=mean)
Average=apply(group.data[,traits],2,mean)
Average=c("Group.1"="總體平均值 Average",Average)
g.mean=rbind(g1.res,t(as.data.frame(Average)))
write.csv(t(g.mean),file="06.聚類樹分組統計平均值.csv")

關于“R語言農藝性狀表型數據與環境關聯互作代碼分析”這篇文章的內容就介紹到這里，感謝各位的閱讀！相信大家對“R語言農藝性狀表型數據與環境關聯互作代碼分析”知識都有一定的了解，大家如果還想學習更多知識，歡迎關注億速云行業資訊頻道。

向AI問一下細節

91超碰碰碰碰久久久久久综合_超碰av人澡人澡人澡人澡人掠_国产黄大片在线观看画质优化_txt小说免费全本

R語言農藝性狀表型數據與環境關聯互作代碼分析

猜你喜歡

91超碰碰碰碰久久久久久综合_超碰av人澡人澡人澡人澡人掠_国产黄大片在线观看画质优化_txt小说免费全本

R語言農藝性狀表型數據與環境關聯互作代碼分析

猜你喜歡

最新資訊

相關推薦

相關標簽