########################## #TP Clasificación binaria# ########################## ####################### #Código transparencias# ####################### n=1000 pi1=0.5 population=sample(1:2, n, rep = TRUE, prob = c(pi1, 1- pi1)) table(population) n1=table(population)[1]; n2=table(population)[2] mu.1 = 2.5; sigma.1 = 1 mu.2 = 7; sigma.2 = 2 x1 =rnorm(n1, mu.1, sigma.1) x2= rnorm(n2, mu.2, sigma.2) x12=c(x1, x2) mean(x1); mean(x2); mean(x12) sd(x1); sd(x2); sd(x12) par(mfrow=c(1,2)) hist(x1, freq = F); curve(dnorm(x, mean = mu.1, sd = sigma.1), add = T, lwd = 2, col = 'red') rug(x1, col = 'red') hist(x2, freq = F); curve(dnorm(x, mean = mu.2, sd = sigma.2), add = T, lwd = 2, col = 'blue') rug(x2, col = 'blue') hist(x12, freq = F,add=T) curve(dnorm(x, mean = mu.1, sd = sigma.1),xlim=c(-3.5,10), ylim=c(0,0.5), lty = 1, col = 'red',ylab='densities', main='marginales, boundary') curve(dnorm(x, mean = mu.2, sd = sigma.2), lty = 1,add=T, col = 'blue') g.mixture =function(x, pi, mu, sigma) { g <- pi * dnorm(x, mu[1], sigma[1]) + (1 - pi) * dnorm(x, mu[2], sigma[2]) return(g) } curve(g.mixture(x, pi = pi1, c(mu.1, mu.2), c(sigma.1, sigma.2)), lwd = 2, add = T) boundary=function(x) {dnorm(x,mu.1,sigma.1)/dnorm(x,mu.2,sigma.2)-1 } curve(boundary(x), lty = 1,add=T, col = 'black') boundary=function(x) {dnorm(x,mu.1,sigma.1)/dnorm(x,mu.2,sigma.2)-1 } library(rootSolve) #required by the function uniroot.all raices <- uniroot.all(boundary,c(-100,100)) boundary=function(x) {dnorm(x,mu.1,sigma.1)/dnorm(x,mu.2,sigma.2)-2 } library(rootSolve) #required by the function uniroot.all raices =uniroot.all(boundary,c(-1000,1000)) x.test =sample(c(0, 1, 3.5, 8, 10, 12)) clasificacion = ifelse(pi1 * dnorm(x.test, mu.1, sigma.1) > (1 - pi1) * dnorm(x.test, mu.2, sigma.2), "Group 1", "Group 2") cbind(x.test, poblacion = clasificacion) ############################################### #Ejercicio 1 del práctico Binary Clasification# ############################################### library(e1071) ?naiveBayes data=matrix(c('a','b',1,'a','a',1,'b','a',1,'a','a',1,'a','a',-1,'b','b',-1,'b','b',-1,'b','b',-1),nrow=8,ncol=3,byrow=T) data data=data.frame(data) data nb=naiveBayes(X3~X1+X2, data=data) test=matrix(c('a','a','a','b','b','a','b','b'),ncol=2,nrow=4,byrow=T) test=data.frame(test) predict(nb,newdata=test) predict(nb,newdata=test,type='raw') ############################################### #Ejercicio 2 del práctico Binary Clasification# ############################################### library(MASS) ntrain <- 200 #half of training sample size ntest <- 50 #half of test sample size ##define the model parameters mean1 <- c(3,1) mean2 <- c(1,3) Sig <- matrix(c(1,0,0,1),nrow=2) ## generate the training data: Class 1=green, Class 2=red seed_train <- 1234 set.seed(seed_train) train1 <- rbind(mvrnorm(ntrain,mean1,Sig),mvrnorm(ntrain,mean2,Sig)) ytrain <- c(rep(1,ntrain),rep(0,ntrain)) plot(train1[1:ntrain,],xlab="X1",ylab="X2",xlim=range(train1[,1]),ylim=range(train1[,2]),col="green", main="Training Data") points(train1[(ntrain+1):(2*ntrain),],col="red") ##Compute the training error for Bayes rule predtrain <- 1*(train1[,1]>train1[,2]) baytrainerr1 <- mean(predtrain!=ytrain) ##Linear model linfit1 <- lm(ytrain~train1) inter1 <- linfit1$coef[1] slope1 <- linfit1$coef[-1] ## compute the training error linpredtrain1 <- as.vector(1*((inter1+train1%*%slope1)>0.5)) lintrainerr1 <- mean(linpredtrain1!=ytrain) plot(train1[1:ntrain,],xlab="X1",ylab="X2",xlim=range(train1[,1]),ylim=range(train1[,2]),col="green", main="Scenario 1: Training Data") points(train1[(ntrain+1):(2*ntrain),],col="red") abline(coef=c(0,1),lty=1,lwd=3) abline(coef=c((inter1-0.5)/(-slope1[2]),slope1[1]/(-slope1[2])),lty=2,lwd=3) ## generate the test data: Class 1=green, Class 2=red seed_test <- 77 set.seed(seed_test) test1 <- rbind(mvrnorm(ntest,mean1,Sig),mvrnorm(ntest,mean2,Sig)) ytest <- c(rep(1,ntest),rep(0,ntest)) predtest <- 1*(test1[,1]>test1[,2]) baytesterr1 <- mean(predtest!=ytest) print(baytrainerr1) print(baytesterr1) predlm=as.vector(1*((inter1+test1%*%slope1)>0.5)) predlmerr=mean(predlm!=ytest) predlmerr plot(test1[1:ntest,],xlab="X1",ylab="X2",xlim=range(test1[,1]),ylim=range(test1[,2]),col="green", main="Scenario 1: Test Data") points(test1[(ntest+1):(2*ntest),],col="red") abline(coef=c(0,1),lty=1,lwd=3) abline(coef=c((inter1-0.5)/(-slope1[2]),slope1[1]/(-slope1[2])),lty=2,lwd=3)