您好,登錄后才能下訂單哦!
這篇文章主要介紹了R語言農藝性狀表型數據與環境關聯互作代碼分析的相關知識,內容詳細易懂,操作簡單快捷,具有一定借鑒價值,相信大家閱讀完這篇R語言農藝性狀表型數據與環境關聯互作代碼分析文章都會有所收獲,下面我們一起來看看吧。
農藝性狀表型數據與環境關聯互作分析
local({r <- getOption("repos") r["CRAN"] <- "http://mirrors.tuna.tsinghua.edu.cn/CRAN/" options(repos=r)}) options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor") #install.packages("GGEBiplots") #install.packages("GGEBiplotGUI") library(GGEBiplotGUI) library("GGEBiplots") library("reshape2") library(vegan) library(corrplot) library("psych") library(Cairo) setwd("D:/potato/") data1<-read.table("張北data.txt",header = F,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8") data.t<-as.data.frame(t(data1)) data.l<-melt(data.t,id.vars = c('年度', '性狀'),variable.name = "基因型") mydata1<-dcast(data.l, `年度` + `基因型` ~ `性狀`) #`單株產量` + `淀粉含量(%)` + `干物質含量(%)` + `平均出苗率(%)`+ `生育日數(天)`+ `株高` data2<-read.table("康保data.txt",header = F,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8") data.t<-as.data.frame(t(data2)) data.l<-melt(data.t,id.vars = c('年度', '性狀'),variable.name = "基因型") mydata2<-dcast(data.l, `年度` + `基因型` ~ `性狀`) #`單株產量` + `淀粉含量(%)` + `干物質含量(%)` + `平均出苗率(%)`+ `生育日數(天)` + `株高` #inter<-intersect(colnames(mydata1),colnames(mydata2)) #去掉一些數據 inter<-c("年度" , "基因型" , "單株產量" , "淀粉含量(%)" , "干物質含量(%)" , "平均出苗率(%)" , "生育日數(天)" , "株高" ) mydata1.c<-mydata1[,inter] mydata2.c<-mydata2[,inter] mydata1.c$`環境`="張北" mydata2.c$`環境`="康保" mydata=rbind(mydata1.c,mydata2.c) ########################計算平均值 #traits<-c("單株產量" , "淀粉含量(%)" , "干物質含量(%)" , "平均出苗率(%)", "生育日數(天)" , "小區產量" ,"折合單位產量kg/hm", "株高") traits=c( "單株產量" , "淀粉含量(%)" , "干物質含量(%)" , "平均出苗率(%)" , "生育日數(天)" , "株高" ) for (i in traits){ mydata[,i]=as.double(mydata[,i]) } SD=round(apply(mydata[,traits],2,sd),2) MEAN=round(apply(mydata[,traits],2,mean),2) RANGE=round(apply(mydata[,traits],2,range),2) CV=round(SD/MEAN*100,2) H=round(diversity(mydata[,traits], index = "shannon", MARGIN = 2),2) trait.mean=cbind(`平均值`=MEAN,`標準差`=SD,`變異系數(%)`=CV,`變幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多樣性指數 H’`=H) write.csv(trait.mean,file="01.馬鈴薯表型性狀統計.csv") #################################相關性分析 occor = corr.test(mydata[,traits],use="pairwise",method="spearman",adjust="BH",alpha=.05) occor.r = occor$r # 取相關性矩陣R值 occor.p = occor$p # 取相關性矩陣p值 #設置色板 palette_2 <- colorRampPalette(c("yellow","red"))(100) #繪圖 #png(file="traits_cor.png",w=10*300,h=10*300,res = 300) pdf(file="traits_cor.pdf",w=10,h=10,family="GB1") corrplot(occor.r, method = "square", type = "lower", order="AOE", col = palette_2, tl.pos = "tp", tl.col = "blue", cl.pos = "r", title = "Traits corr", mar = c(2,4,4,1)) corrplot(occor.r, method = "number", type = "upper",order="AOE", col = palette_2,add = TRUE, diag = FALSE, tl.pos = "n", cl.pos = "n", number.cex=1,mar = c(2,4,4,1)) dev.off() write.csv(occor.p,file="02.相關性P值.csv") write.csv(occor.r,file="03.相關性R值.csv") ########################分布范圍柱狀圖 #########################求平均 genotype.mean=aggregate(mydata[,traits],by=list(`基因型`=mydata$`基因型`),mean) for(t in traits){ #t="平均出苗率(%)" d<- genotype.mean[,t] ff=gsub("\\(\\S+\\)", "", t) ff=gsub("kg/hm", "", ff) pdf(paste0(ff,"-表型性狀值分布.pdf"),w=5,h=5,family="GB1") par(mar=c(6,4.1,2,4)) xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d)),length.out=10), plot=F,xlab =t ,ylim=c(0,),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "樣本數",freq=T,main="") xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d)),length.out=10), plot=T,xlab ="" ,ylim=c(0,max(xhist$counts)*1.1),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "樣本數",freq=T,main="") mtext(t,side=1,line=4.5) axis(side=1,xhist$mids,labels=F) b=(xhist$mids[2]-xhist$mids[1])/2 s=round(xhist$mids-b,2) s[1]=0 e=round(xhist$mids+b,2) lab=paste(s,e,sep="-") text(x=xhist$mids, y=-3, labels=lab,cex=1, xpd=TRUE, srt=45,adj=1) dd=density(d) par(new=T) plot(dd, ylim=c(0,max(dd$y)*1.1), col ='red',lwd=2,yaxs="i",xlab = "",ylab="",xaxt="n",yaxt="n",main = "") axis(4, las=1,at=seq(from = 0, to = max(xhist$density), length.out = 6), labels=round(seq(0, max(xhist$density),length.out =6)/sum(xhist$density),2), col = 'red', col.axis = 'red') mtext("頻率",side=4,line=3,col="red") box(bty="l") dev.off() } ##############GGEbioplots ggdata=mydata[,c("環境","基因型","單株產量")] ggdata1=aggregate(`單株產量` ~ 基因型 + 環境, data = mydata, mean) gg.f=dcast(ggdata1,基因型 ~環境) row.names(gg.f)=gg.f[,1] gg.f=gg.f[,-1]*10 #GGEBiplot(Data = gg.f) library(ggplot2) GGE1<-GGEModel(gg.f,centering = "tester", SVP = "symmetrical", scaling = "none") g1=GGEPlot(GGE1,type=6,sizeGen=4,sizeEnv = 2,largeSize=3) g1=g1+ labs(title = "") pdf(paste0("適應性分析bioplot.pdf"),w=5,h=5,family="GB1") print(g1) dev.off() g2=GGEPlot(GGE1,type=9,sizeGen=2,sizeEnv = 3,largeSize=3) g2=g2+ labs(title = "") pdf(paste0("豐產性-穩定性分析bioplot.pdf"),w=5,h=5,family="GB1") print(g2) dev.off() #################################表型主成分分析 p=genotype.mean row.names(p)=p[,1] p=p[,-1] pca = prcomp(t(p), scale=TRUE) #pca = prcomp(t(decostand(p, "hellinger")), scale=TRUE) ss=summary(pca) pc=pca$x xx=ss$importance row.names(xx)<-c("特征值E","貢獻率CR","累計貢獻率CCR") pca.res=rbind(pc,xx) write.csv(pca.res,file="04.PCA分析結果.csv") #表型數據聚類分析 dist_mat <- dist(genotype.mean[,2:5], method = "euclidean") clustering <- hclust(dist_mat, method = "complete") plot(clustering,labels=genotype.mean$基因型,xlab="sample",ylab="Height" ,main="") #根據進化樹分組 group.data=genotype.mean group.data$group=as.factor(cutree(clustering,h=15)) names=c("基因型" , "單株產量" , "淀粉含量" , "干物質含量", "平均出苗率" ,"生育日數" ,"株高" , "group" ) colnames(group.data)=names library("ggpubr") phenotype=names[2:(length(names)-1)] test.res=data.frame() for(i in phenotype){ f=as.formula(paste0("`",i,"`~group")) print(f) res=compare_means(f, data = group.data,method="t.test") print(res) test.res=rbind(test.res,res) } write.csv(test.res,file="聚類樹分組差異統計檢驗.csv") write.csv(group.data,file="聚類樹分組信息表.csv") g1.res=aggregate(group.data[,2:7],by=list(group.data$group),FUN=mean) g2.res=aggregate(group.data[,2:7],by=list(group.data$group),FUN=sd) write.csv(g1.res,file="聚類樹分組統計平均值.csv") write.csv(g2.res,file="聚類樹分組統計標準差.csv") #表型數據在不同環境下變異 for (i in traits){ mydata1[,i]=as.double(mydata1[,i]) } SD=round(apply(mydata1[,traits],2,sd),2) MEAN=round(apply(mydata1[,traits],2,mean),2) RANGE=round(apply(mydata1[,traits],2,range),2) CV=round(SD/MEAN*100,2) H=round(diversity(mydata1[,traits], index = "shannon", MARGIN = 2),2) trait.mean=cbind(`平均值`=MEAN,`標準差`=SD,`變異系數(%)`=CV,`變幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多樣性指數 H’`=H) write.csv(trait.mean,file="01.馬鈴薯表型性狀統計(張北).csv") #表型數據在不同環境下變異 for (i in traits){ mydata2[,i]=as.double(mydata2[,i]) } SD=round(apply(mydata2[,traits],2,sd),2) MEAN=round(apply(mydata2[,traits],2,mean),2) RANGE=round(apply(mydata2[,traits],2,range),2) CV=round(SD/MEAN*100,2) H=round(diversity(mydata2[,traits], index = "shannon", MARGIN = 2),2) trait.mean=cbind(`平均值`=MEAN,`標準差`=SD,`變異系數(%)`=CV,`變幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多樣性指數 H’`=H) write.csv(trait.mean,file="01.馬鈴薯表型性狀統計(康保).csv") ######關聯分析 mydata$年度=as.factor(mydata$年度) mydata$環境=as.factor(mydata$環境) F.res=data.frame() for (i in traits){ print(i) fl=c(as.formula(paste0("`",i,"`~基因型")), as.formula(paste0("`",i,"`~環境")), as.formula(paste0("`",i,"`~年度")), as.formula(paste0("`",i,"`~基因型*環境")), as.formula(paste0("`",i,"`~基因型*年度")) #as.formula(paste0("`",i,"`~基因型*年度*環境")) ) for(f in fl){ blp=lm(f,data=mydata) aa=summary(blp) # #print(aa) print(aa$df) print(f) fstatistic = aa$fstatistic p_value = pf(as.numeric(fstatistic[1]), as.numeric(fstatistic[2]), as.numeric(fstatistic[3]), lower.tail = FALSE) print(p_value) aa=data.frame(`公式`=as.character(f)[3],df=aa$df[1],F=fstatistic[1],P=p_value,`性狀`=i) F.res=rbind(F.res,aa) # question1 <- readline("Would you like to proceed untill the loop ends? (Y/N)") # if(regexpr(question1, 'y', ignore.case = TRUE) == 1){ # continue = TRUE # next # } else{ # break # } } } write.csv(F.res,file="05.表型與基因型環境年度互作效應.csv")
第二版版本,注意PCA分析升級,輸入數據行為樣本,列為表型;
local({r <- getOption("repos") r["CRAN"] <- "http://mirrors.tuna.tsinghua.edu.cn/CRAN/" options(repos=r)}) options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor") #install.packages("GGEBiplots") #install.packages("GGEBiplotGUI") library(GGEBiplotGUI) library("GGEBiplots") library("reshape2") library(vegan) library(corrplot) library("psych") library(Cairo) setwd("D:/potato/第二次分析") mydata<-read.table("data.txt",header = T,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8") ########################計算平均值 traits=colnames(mydata) for (i in traits){ mydata[,i]=as.double(mydata[,i]) } SD=round(apply(mydata[,traits],2,sd),2) MEAN=round(apply(mydata[,traits],2,mean),2) RANGE=round(apply(mydata[,traits],2,range),2) CV=round(SD/MEAN*100,2) H=round(diversity(mydata[,traits], index = "shannon", MARGIN = 2),2) J=round(H/log(specnumber(t(mydata[,traits]))),2) trait.mean=cbind(`平均值`=MEAN,`標準差`=SD,`變異系數(%)`=CV,`變幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="~"),`H’`=H,J=J) write.csv(trait.mean,file="01.馬鈴薯表型性狀統計.csv") ########################分布范圍柱狀圖 #########################求平均 for(t in traits){ ##t="頂小葉寬" #t="出苗率" d<- mydata[,t] pdf(paste0(t,"-表型性狀值分布.pdf"),w=5,h=5,family="GB1") par(mar=c(6,4.1,2,4)) MIN=min(d) MAX=max(d) step=(MAX-MIN)/(length(d)^(1/2)+1) xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d))+step,by=step), plot=F,xlab =t ,ylim=c(0,),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "樣本數",freq=T,main="") xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d))+step,by=step), plot=T,xlab ="" ,ylim=c(0,max(xhist$counts)*1.1),col="green",xaxt="n", yaxs="i",fill=NULL,ylab = "樣本數",freq=T,main="") mtext(t,side=1,line=4.5) axis(side=1,xhist$mids,labels=F) s=round(xhist$mids-step,2) if (s[1]<0) {s[1]=0} e=round(xhist$mids+step,2) lab=paste(s,e,sep="-") text(x=xhist$mids, y=-3, labels=lab,cex=1, xpd=TRUE, srt=45,adj=1) axis(4, las=1,at=seq(from = 0, to = max(xhist$counts), length.out = 4), labels=paste0(round(seq(0, max(xhist$counts),length.out =4)/sum(xhist$counts),4)*100,"%"), col = 'red', col.axis = 'red') dd=density(d) par(new=T) plot(dd, ylim=c(0,max(dd$y)*1.1), col ='red',lwd=2,yaxs="i",xlab = "",ylab="",xaxt="n",yaxt="n",main = "") #mtext("頻率",side=4,line=3,col="red") box(bty="l") l.at="topright" if(t=="出苗率" || t=="葉緣形狀"){ l.at="topleft" } legend(l.at,legend = c("直方圖"), fill = c('green'), bty='n') legend(l.at,legend = c("正態圖"), col = c("red"), lty=1, bty='n',inset = c(0,0.08), lwd=3) dev.off() } ################相關性分析################### #################################相關性分析 occor = corr.test(mydata,use="pairwise",method="spearman",adjust="BH",alpha=.05) occor.r = occor$r # 取相關性矩陣R值 occor.p = occor$p # 取相關性矩陣p值 #設置色板 palette_2 <- colorRampPalette(c("yellow","red"))(100) #繪圖 #png(file="traits_cor.png",w=10*300,h=10*300,res = 300) pdf(file="traits_cor.pdf",w=10,h=10,family="GB1") corrplot(occor.r, method = "square", type = "lower", order="AOE", col = palette_2, tl.pos = "tp", tl.col = "blue", cl.pos = "r", title = "Traits corr", mar = c(2,8,4,1)) corrplot(occor.r, method = "number", type = "upper",order="AOE", col = palette_2,add = TRUE, diag = FALSE, tl.pos = "n", cl.pos = "n", number.cex=1,mar = c(2,4,4,1)) dev.off() write.csv(occor.p,file="02.相關性P值.csv") write.csv(occor.r,file="03.相關性R值.csv") ###############因子分析############### mydata.l=melt(mydata) bt=bartlett.test(value~variable,data=mydata.l) kmo=KMO(mydata) sink("04.因子分析.txt") bt kmo sink() #################################表型主成分分析 #pca = prcomp(mydata, scale=TRUE) pca = princomp(mydata, scores=TRUE, cor=TRUE) #pca = prcomp(t(mydata), scale=TRUE) fa1 = factanal(mydata, factor=2, rotation="varimax", scores="regression") fa1 #write.csv(abs(fa1$loadings), "loadings.csv") pdf(file="04.PCA分析特征根碎石圖.pdf",w=8,h=4,family="GB1") screeplot(pca, type="line", main="Scree Plot") dev.off() sink(file="04.PCA分析summary.txt") summary(pca) sink() pc=loadings(pca)[1:21,] pc=as.data.frame(pc) write.csv(pc,file="04.PCA分析結果.csv") library(RColorBrewer) mycolor=c(brewer.pal(9, "Set1"),brewer.pal(8, "Set2"),brewer.pal(9, "Paired")) pdf(file="04.PCA分析表型分布.pdf",w=7,h=6,family="GB1") par(mar=c(5,4,4,10)) plot(x=pc[,1],y=pc[,2],cex=1.5,col=mycolor,pch=16,xlab="factor 1",ylab = "factor 2") abline(h=0,lty=1) abline(v=0,lty=1) legend("topright",legend =rownames(pc) ,pch=16,col=mycolor,xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1) dev.off() #表型數據聚類分析 dist_mat <- dist(mydata, method = "euclidean") clustering <- hclust(dist_mat, method = "complete") pdf(file="05.聚類圖.pdf",w=15,h=4,family="GB1") plot(clustering,labels=rownames(mydata),cex=0.55,xlab="sample",ylab="Height" ,main="") abline(h=40,col="red") dev.off() #根據聚類樹分組 group.data=mydata group.data$group=as.factor(cutree(clustering,h=40)) ###################分組PCA圖################ #install.packages("ggfortify") library(ggfortify) pdf(file="04.PCA分析表型樣本雙序圖.pdf",w=5,h=4,family="GB1") #par(mar=c(5,4,4,10)) #pcs=pca$scores #plot(x=pcs[,1],y=pcs[,2],cex=1.5,col=mycolor[group.data$group],pch=1,xlab="factor 1",ylab = "factor 2") #par(new=T) #plot(x=pc[,1],y=pc[,2],cex=1.5,col="blue",pch=16,xlab="factor 1",ylab = "factor 2") #legend("topright",legend =rownames(pc) ,pch=16,col=mycolor,xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1) #legend("topright",legend =1:6 ,pch=16,col=mycolor[1:6],xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1) #biplot(pca) g=autoplot(pca, data = group.data, colour = 'group', loadings = TRUE, loadings.colour = 'blue', loadings.label = TRUE, loadings.label.size = 3,size=2)#,frame = TRUE, frame.type = 'norm') g=g+scale_color_manual(name="group",values =mycolor)+ scale_fill_manual(name="group",values =mycolor)+ theme_bw()+ theme( panel.grid=element_blank(), axis.text.x=element_text(colour="black"), axis.text.y=element_text(colour="black"), panel.border=element_rect(colour = "black"), legend.key = element_blank(), plot.title = element_text(hjust = 0.5)) g dev.off() library("ggpubr") phenotype=traits test.res=data.frame() for(i in phenotype){ f=as.formula(paste0("`",i,"`~group")) print(f) fit=aov(f,data=group.data) ss=summary(fit) res=as.data.frame(ss[[1]]) res$`表型`=i rownames(res)=c("組間","組內") print(res) test.res=rbind(test.res,res) } colnames(test.res)=c("Df","平方和SS","均方MS","F","顯著性","性狀trait") write.csv(test.res,file="05.聚類樹分組差異統計檢驗.csv") write.csv(group.data,file="05.聚類樹分組信息表.csv") g1.res=aggregate(group.data[,traits],by=list(group.data$group),FUN=mean) Average=apply(group.data[,traits],2,mean) Average=c("Group.1"="總體平均值 Average",Average) g.mean=rbind(g1.res,t(as.data.frame(Average))) write.csv(t(g.mean),file="06.聚類樹分組統計平均值.csv")
關于“R語言農藝性狀表型數據與環境關聯互作代碼分析”這篇文章的內容就介紹到這里,感謝各位的閱讀!相信大家對“R語言農藝性狀表型數據與環境關聯互作代碼分析”知識都有一定的了解,大家如果還想學習更多知識,歡迎關注億速云行業資訊頻道。
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。