############################################### #ESTADISTICA MULTIVARIADA COMPUTACIONAL - 2021# ############################################### ##################################### #Práctico Aprendizaje No Supervisado# ##################################### ########################### #1. Muldimensional Scaling# ########################### #1.1. Ejercicio 1 del práctico 4 distUru <- read.csv(file = "DistUruguay-2-euclideas.csv", header = TRUE, sep = ";", row.names = 1) rownames(distUru) <- c("Artigas", "Canelones", "Colonia", "Durazno", "Florida", "Fray Bentos", "Melo", "Mercedes", "Minas", "Montevideo", "Paysandú", "Maldonado", "Rivera", "Rocha", "Salto", "San José", "Tacuarembó", "Treinta y Tres", "Trinidad") colnames(distUru) <- c("Artigas", "Canelones", "Colonia", "Durazno", "Florida", "Fray Bentos", "Melo", "Mercedes", "Minas", "Montevideo", "Paysandú", "Maldonado", "Rivera", "Rocha", "Salto", "San José", "Tacuarembó", "Treinta y Tres", "Trinidad") mapaUru = cmdscale(distUru, eig=TRUE, k=2, add = FALSE) plot(x=mapaUru$points[,1],y=mapaUru$points[,2]) text(mapaUru$points[,1],y=mapaUru$points[,2],labels=rownames(distUru), cex = 0.6, pos = 4, col = "red") theetaDEG <- -65 theeta <- theetaDEG*pi/180 mapaUru$points <- mapaUru$points %*% matrix(c(cos(theeta), -sin(theeta), sin(theeta), cos(theeta)), byrow = TRUE, ncol = 2) mapaUru$points[,1] <- -mapaUru$points[,1] plot(x=mapaUru$points[,1],y=mapaUru$points[,2]) text(mapaUru$points[,1],y=mapaUru$points[,2],labels=rownames(distUru), cex = 0.6, pos = 4, col = "red") #Otra posibilidad para graficar mapaUruMDS <- data.frame(rownames(mapaUru$points), mapaUru$points[,1], mapaUru$points[,2]) rownames(mapaUruMDS) <- c("Artigas", "Canelones", "Colonia", "Durazno", "Florida", "Fray Bentos", "Melo", "Mercedes", "Minas", "Montevideo", "Paysandú", "Maldonado", "Rivera", "Rocha", "Salto", "San José", "Tacuarembó", "Treinta y Tres", "Trinidad") colnames(mapaUruMDS) <- c("Ciudad", "X", "Y") ggplot(mapaUruMDS, aes(X, Y)) + geom_text(aes(label = Ciudad), size=2.5, check_overlap = TRUE) + geom_point(color='blue', alpha=1/1.75) + xlim(-275, 275) + theme_gray() + coord_fixed() #1.2 Ejercicio 2 del práctico 4 library(FactoMineR) A=matrix(c(1,0,0,1,2,0,2,2,2,0,0,2),nrow=4,ncol=3,byrow = T) aa=PCA(A) aa$ind$coord aa$ind$coord[,1] sum(aa$ind$contrib[,1]) sum(1/4*(aa$ind$coord[,1])^2/aa$eig[1]) ##A mano AA=scale(A,center=T,scale=apply(A,2,sd)*sqrt((nrow(A)-1)/(nrow(A)))) AA%*%eigen(1/4*t(AA)%*%AA)$vector[,1] #Z1 primera componente, igual que en PCA ##Esc multidimensional de AA dis=dist(AA,diag=T,upper=T) unos=rep(1,4) H=diag(1,4)-unos%*%t(unos)/4 Q=-0.5*H%*%as.matrix(dis)^2%*%H eigen(Q) Y=eigen(Q)$vectors%*%diag(sqrt(eigen(Q)$values))#que coincide con la matriz de #componentes principales ?cmdscale model=cmdscale(d=dis) model model=cmdscale(d=dis,x.ret=T) model$x H%*%as.matrix(dis)^2%*%H #1.3 Ejercicio 5 del práctico 4 X=matrix(c(1,1,0,0,1,1, 1,1,1,0,0,1, 1,0,0,1,0,1, 1,0,0,1,0,1, 1,0,0,0,1,1, 0,0,0,0,1,0),6,6,byrow=T) library(ade4) #Matriz de distancia al cuadrado: D2= as.matrix(2* dist.binary(X,method=2,diag=T,upper=T)^2) P=diag(1,6)-1/6*rep(1,6)%*%t(rep(1,6)) Q=-0.5 * P%*%D2%*%P eigen (Q) nombres=c('leon', 'girafa', 'vaca','oveja', 'gato', 'hombre') plot(x=eigen(Q)$vector[,1],y=eigen(Q)$vector[,2],xlim=c(-1,1),ylim=c(-1,1),xlab='Y1',ylab='Y2',main=paste("A mano")) text(x=eigen(Q)$vector[,1],y=eigen(Q)$vector[,2], labels = nombres, cex = 0.6, pos = 4, col = "red") #Con la función mds par(mfrow=c(1,2)) fit=cmdscale(d=sqrt(D2),eig=TRUE, k=2,x.ret=T) plot(x=fit$points[,1],y=fit$points[,2],xlim=c(-1,1),ylim=c(-1,1),main=paste("Con funcion cmdscale")) text(x=eigen(Q)$vector[,1],y=eigen(Q)$vector[,2], labels = nombres, cex = 0.6, pos = 4, col = "red") Y=eigen(Q)$vector #elefante elefante=c(1,1,0,0,0,1) Lambda=diag(eigen(Q)$value) #Calculo la distancia del elefante a los demás individuos delefante=as.matrix(2*dist.binary(rbind(X,elefante),method=2,diag=T,upper=T)^2)[7,-7] vp=eigen(Q)$values for (i in length(vp)) { if (vp[i]<0) {vp[i] = 0} else{vp[i] = vp[i]} } iX=eigen(Q)$vectors%*%sqrt(diag(vp)) coordele=0.5*diag(1/(eigen(Q)$value))%*%t(iX)%*%(diag(Q)-delefante) plot(x=fit$points[,1],y=fit$points[,2],xlim=c(-1,1),ylim=c(-1,1),main=paste("Con funcion cmdscale")) text(x=eigen(Q)$vector[,1],y=eigen(Q)$vector[,2], labels = nombres, cex = 0.6, pos = 4, col = "red") points(x=coordele[1],y=coordele[2], col = 5, pch = 8, cex = 2) text(x=coordele[1],y=coordele[2], labels = paste("elefante"), cex = 0.6, pos = 4, col = 5) ################################################ #2.Variables e individuos suplementarios en ACP# ################################################ #2.1 individuos suplementarios library(ade4) X =as.matrix(tortues[,1:3]) data("tortues") desv= function(x){sqrt((1/length(x))*sum((x-mean(x))^2))} Xs=scale(X,center=T,scale=apply(X,2,FUN=desv)) S=1/nrow(X)*t(Xs)%*%Xs Vectprop=eigen(S)$vectors Z1=Xs%*%Vectprop[,1] Z2=Xs%*%Vectprop[,2] Xsup=matrix(c(158,104,57,163,104,62,161,108,63),3,3,byrow=T) Xsups=scale(Xsup,center=T,scale=apply(Xsup,2,FUN=desv)) Z1sup=Xsups%*%Vectprop[,1] Z2sup=Xsups%*%Vectprop[,2] color= c(1,2) names(color)=c("M","F") vectorcoloreado= color[tortues[,4]] plot(Z1,Z2, pch=15,col=vectorcoloreado, main = "Individuos suplementarios",xlab = "Comp1",ylab = "Comp2") abline(h=0,lty=1,lwd=2) abline(v=0,lty=1,lwd=2) points(Z1sup,Z2sup,pch=18,col='green') #2.2 Variable discreta suplementaria library(ade4) X =as.matrix(tortues[,1:3]) desv= function(x){sqrt((1/length(x))*sum((x-mean(x))^2))} Xs=scale(X,center=T,scale=apply(X,2,FUN=desv)) S=1/nrow(X)*t(Xs)%*%Xs Vectprop=eigen(S)$vectors Z1=Xs%*%Vectprop[,1] Z2=Xs%*%Vectprop[,2] color= c(1,2) names(color)=c("M","F") vectorcoloreado= color[tortues[,4]] plot(Z1,Z2, pch=15,col=vectorcoloreado, main = "Variable discreta suplementaria",xlab = "Comp1",ylab = "Comp2",asp = 1) abline(h=0,lty=1,lwd=2) abline(v=0,lty=1,lwd=2) Z12 <- matrix(c(Z1,Z2),nrow=48,ncol=2,byrow = F) points(colMeans(Z12[tortues[,4]=="F",])[1],colMeans(Z12[tortues[,4]=="F",])[2],col="black",pch=8,cex=1.5) points(colMeans(Z12[tortues[,4]=="M",])[1],colMeans(Z12[tortues[,4]=="M",])[2],col="black",pch=8,cex=1.5)