################################################### # Discriminating phoneme curves: # R/S-plus commandlines ################################################### ## # Entering phoneme data (5 groups: "sh" <--> 1, # "iy" <--> 2, # "dcl" <--> 3, # "aa" <--> 4, # "ao" <--> 5) #################################################################### PHONDAT <- as.matrix(read.table("npfda-phoneme.dat")) attributes(PHONDAT)$dimnames <- NULL PHONCURVES <- PHONDAT[,1:150] # sample of curves ## # Building one learning sample and one testing sample ##################################################### Learn.sh <- sample(1:400,50) Learn.iy <- sample(401:800,50) Learn.dcl <- sample(801:1200,50) Learn.aa <- sample(1201:1600,50) Learn.ao <- sample(1601:2000,50) Test.sh <- sample((1:400)[-Learn.sh],50) ind <- (1:800)[-Learn.iy] Test.iy <- sample(ind[ind>401],50) ind <- (1:1200)[-Learn.dcl] Test.dcl <- sample(ind[ind>801],50) ind <- (1:1600)[-Learn.aa] Test.aa <- sample(ind[ind>1201],50) ind <- (1:2000)[-Learn.ao] Test.ao <- sample(ind[ind>1601],50) Learning <- c(Learn.sh,Learn.iy,Learn.dcl,Learn.aa,Learn.ao) Testing <- c(Test.sh,Test.iy,Test.dcl,Test.aa,Test.ao) PHONLEARN <- PHONCURVES[Learning,] # learning sample of curves PHONTEST <- PHONCURVES[Testing,] # testing sample of curves Classlearn <- sort(rep(1:5,50)) # learning class numbers Classtest <- sort(rep(1:5,50)) # testing class numbers ## # Computing predicted class membership and # misclassification rates (for various semi-metrics) #################################################### res.mplsr5 <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,5,kind.of.kernel = "quadratic",semimetric="mplsr") Misclas.mplsr5 <- sum(res.mplsr5$Predicted.classnumber != Classtest)/250 res.mplsr6 <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,6,kind.of.kernel = "quadratic",semimetric="mplsr") Misclas.mplsr6 <- sum(res.mplsr6$Predicted.classnumber != Classtest)/250 res.mplsr7 <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,7,kind.of.kernel = "quadratic",semimetric="mplsr") Misclas.mplsr7 <- sum(res.mplsr7$Predicted.classnumber != Classtest)/250 res.mplsr8 <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,8,kind.of.kernel = "quadratic",semimetric="mplsr") Misclas.mplsr8 <- sum(res.mplsr8$Predicted.classnumber != Classtest)/250 res.mplsr9 <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,9,kind.of.kernel = "quadratic",semimetric="mplsr") Misclas.mplsr9 <- sum(res.mplsr9$Predicted.classnumber != Classtest)/250 res.pca4 <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,4,kind.of.kernel = "quadratic",semimetric="pca") Misclas.pca4 <- sum(res.pca4$Predicted.classnumber != Classtest)/250 res.pca5 <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,5,kind.of.kernel = "quadratic",semimetric="pca") Misclas.pca5 <- sum(res.pca5$Predicted.classnumber != Classtest)/250 res.pca6 <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,6,kind.of.kernel = "quadratic",semimetric="pca") Misclas.pca6 <- sum(res.pca6$Predicted.classnumber != Classtest)/250 res.pca7 <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,7,kind.of.kernel = "quadratic",semimetric="pca") Misclas.pca7 <- sum(res.pca7$Predicted.classnumber != Classtest)/250 res.pca8 <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,8,kind.of.kernel = "quadratic",semimetric="pca") Misclas.pca8 <- sum(res.pca8$Predicted.classnumber != Classtest)/250 res.deriv0 <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,0,20,c(0,1),kind.of.kernel = "quadratic",semimetric="deriv") Misclas.deriv0 <- sum(res.deriv0$Predicted.classnumber != Classtest)/250 ## # Plotting misclassification rates ################################## Misclas.rates <- c(Misclas.mplsr5,Misclas.mplsr6,Misclas.mplsr7,Misclas.mplsr8,Misclas.mplsr9,Misclas.pca4,Misclas.pca5,Misclas.pca6,Misclas.pca7,Misclas.pca8,Misclas.deriv0) Misclas.names <- c("mplsr5","mplsr6","mplsr7","mplsr8","mplsr9","pca4","pca5","pca6","pca7","pca8","deriv0") dotchart(Misclas.rates, Misclas.names, cex=1, xlab="MISCLASSIFICATION RATES") ## # Computing misclassification rates over 50 runs # (i.e. 50 times the previous computation) ################################################ Misclas.of.phon.over.50.samples.with.pca4 <- 0 Misclas.of.phon.over.50.samples.with.pca5 <- 0 Misclas.of.phon.over.50.samples.with.pca6 <- 0 Misclas.of.phon.over.50.samples.with.pca7 <- 0 Misclas.of.phon.over.50.samples.with.pca8 <- 0 Misclas.of.phon.over.50.samples.with.mplsr5 <- 0 Misclas.of.phon.over.50.samples.with.mplsr6 <- 0 Misclas.of.phon.over.50.samples.with.mplsr7 <- 0 Misclas.of.phon.over.50.samples.with.mplsr8 <- 0 Misclas.of.phon.over.50.samples.with.mplsr9 <- 0 Misclas.of.phon.over.50.samples.with.deriv0 <- 0 for(i in 1:50){ set.seed(sample(0:1000,1)) Learn.sh <- sample(1:400,50) Learn.iy <- sample(401:800,50) Learn.dcl <- sample(801:1200,50) Learn.aa <- sample(1201:1600,50) Learn.ao <- sample(1601:2000,50) Test.sh <- sample((1:400)[-Learn.sh],50) ind <- (1:800)[-Learn.iy] Test.iy <- sample(ind[ind>401],50) ind <- (1:1200)[-Learn.dcl] Test.dcl <- sample(ind[ind>801],50) ind <- (1:1600)[-Learn.aa] Test.aa <- sample(ind[ind>1201],50) ind <- (1:2000)[-Learn.ao] Test.ao <- sample(ind[ind>1601],50) Learning <- c(Learn.sh,Learn.iy,Learn.dcl,Learn.aa,Learn.ao) Testing <- c(Test.sh,Test.iy,Test.dcl,Test.aa,Test.ao) PHONLEARN <- PHONCURVES[Learning,] PHONTEST <- PHONCURVES[Testing,] res <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,4,kind.of.kernel = "quadratic",semimetric="pca") Misclas.of.phon.over.50.samples.with.pca4[i] <- sum(res$Predicted.classnumber != Classtest)/length(Classtest) res <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,5,kind.of.kernel = "quadratic",semimetric="pca") Misclas.of.phon.over.50.samples.with.pca5[i] <- sum(res$Predicted.classnumber != Classtest)/length(Classtest) res <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,6,kind.of.kernel = "quadratic",semimetric="pca") Misclas.of.phon.over.50.samples.with.pca6[i] <- sum(res$Predicted.classnumber != Classtest)/length(Classtest) res <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,7,kind.of.kernel = "quadratic",semimetric="pca") Misclas.of.phon.over.50.samples.with.pca7[i] <- sum(res$Predicted.classnumber != Classtest)/length(Classtest) res <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,8,kind.of.kernel = "quadratic",semimetric="pca") Misclas.of.phon.over.50.samples.with.pca8[i] <- sum(res$Predicted.classnumber != Classtest)/length(Classtest) res <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,5,kind.of.kernel = "quadratic",semimetric="mplsr") Misclas.of.phon.over.50.samples.with.mplsr5[i] <- sum(res$Predicted.classnumber != Classtest)/length(Classtest) res <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,6,kind.of.kernel = "quadratic",semimetric="mplsr") Misclas.of.phon.over.50.samples.with.mplsr6[i] <- sum(res$Predicted.classnumber != Classtest)/length(Classtest) res <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,7,kind.of.kernel = "quadratic",semimetric="mplsr") Misclas.of.phon.over.50.samples.with.mplsr7[i] <- sum(res$Predicted.classnumber != Classtest)/length(Classtest) res <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,8,kind.of.kernel = "quadratic",semimetric="mplsr") Misclas.of.phon.over.50.samples.with.mplsr8[i] <- sum(res$Predicted.classnumber != Classtest)/length(Classtest) res <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,9,kind.of.kernel = "quadratic",semimetric="mplsr") Misclas.of.phon.over.50.samples.with.mplsr9[i] <- sum(res$Predicted.classnumber != Classtest)/length(Classtest) res <- funopadi.knn.lcv(Classlearn,PHONLEARN,PHONTEST,0,30,c(0,1),kind.of.kernel = "quadratic",semimetric="deriv") Misclas.of.phon.over.50.samples.with.deriv0[i] <- sum(res$Predicted.classnumber != Classtest)/length(Classtest) } Misclas.mplsr5 <- Misclas.of.phon.over.50.samples.with.mplsr5 Misclas.mplsr6 <- Misclas.of.phon.over.50.samples.with.mplsr6 Misclas.mplsr7 <- Misclas.of.phon.over.50.samples.with.mplsr7 Misclas.mplsr8 <- Misclas.of.phon.over.50.samples.with.mplsr8 Misclas.mplsr9 <- Misclas.of.phon.over.50.samples.with.mplsr9 Misclas.pca4 <- Misclas.of.phon.over.50.samples.with.pca4 Misclas.pca5 <- Misclas.of.phon.over.50.samples.with.pca5 Misclas.pca6 <- Misclas.of.phon.over.50.samples.with.pca6 Misclas.pca7 <- Misclas.of.phon.over.50.samples.with.pca7 Misclas.pca8 <- Misclas.of.phon.over.50.samples.with.pca8 Misclas.deriv0 <- Misclas.of.phon.over.50.samples.with.deriv0 ## # Plotting misclassification rates over 50 runs ################################################ Misclas.names <- c("plsr5","plsr6","plsr7","plsr8","plsr9","pca4","pca5","pca6","pca7","pca8","deriv0") boxplot(Misclas.mplsr5, Misclas.mplsr6,Misclas.mplsr7,Misclas.mplsr8,Misclas.mplsr9,Misclas.pca4,Misclas.pca5,Misclas.pca6,Misclas.pca7,Misclas.pca8,Misclas.deriv0, names=Misclas.names, xlab="SEMI-METRICS",ylab="MISCLSSIFICATION RATES", cex=0.8)