#############################################################################
#############################################################################
#############################################################################
###                                                               ###########
###                 R Commandlines for implementing               ########### 
###              analyses given in our paper entitled             ###########
###  Most predictive design points for functional data predictors ###########
###  co-authored by Frederic Ferraty, Peter Hall, Philippe Vieu   ###########
###                     (Biometrika - 2010 - 97, 807-824)         ###########
###                                                               ###########
#############################################################################
#############################################################################
#############################################################################

###################################################################
### Downloading Most Predictive Design Points (mpdp) R routines ###
###################################################################
file("http://www.lsp.ups-tlse.fr/staph/npfda/mpdp-routinesR.txt",open="r")
source("http://www.lsp.ups-tlse.fr/staph/npfda/mpdp-routinesR.txt")
### From now on, the R routines "mpdp" and "predict.mpdp" can be used ###
 
In order to compare our stepwise algorithm with alternative methods, 
follow these instructions: 
#############################################################################
### Downloading NonParametric Functional Data Analysis (npfda) R routines ###
### (see Ferraty and Vieu, 2006)                                          ###
#############################################################################
file("http://www.lsp.ups-tlse.fr/staph/npfda/npfda-routinesR.txt",open="r")
source("http://www.lsp.ups-tlse.fr/staph/npfda/npfda-routinesR.txt")
### From now on, the R routines "funopare.knn.gcv" can be used ###
#############################################################################
### Downloading Functional Linear Regression (flr) R routines             ###
### (see Cardot, Ferraty and Sarda, 2003)                                 ###
#############################################################################
file("http://www.lsp.ups-tlse.fr/staph/npfda/flr-routinesR.txt",open="r")
source("http://www.lsp.ups-tlse.fr/staph/npfda/flr-routinesR.txt")
### From now on, the R routines "Splinemlf" can be used ###

#############################################################################
#############################################################################
#############################################################################
######################                          #############################
###################### IMPLEMENTING SIMULATIONS ############################# 
######################                          #############################
#############################################################################
#############################################################################
#############################################################################
p <- 100
n <- 300
nlearn <- 200
ntest <- 100
testing <- sample(n,ntest)
learning <- sample((1:n)[-testing],nlearn)
snr <- 0.05
x <- seq(0,2*pi,length=p)
#############################################################################
#############################################################################
### ONE SIMULATION
#############################################################################
#############################################################################
a <- runif(n,min=0,max=1)
b <- runif(n,min=0,max=1)
c <- rnorm(n,sd=0.5)
d <- runif(n,min=0,max=1)
e <- runif(n,min=0,max=1)
f <- rnorm(n,sd=0.5)
g <- runif(n,min=0,max=1)
CURVES <- a %*% t(cos(4*x)) + b %*% t(cos(5*x)) + c %*% t(cos(6*x)) + d %*% t(sin(5*x)) + e %*% t(sin(6*x)) + f %*% t(sin(7*x))+ g %*% t((x-pi)^2)
#######################
# REGRESSION MODEL 
#######################
Reg.operator= CURVES[,25]+2*CURVES[,30]*CURVES[,65]
sdev <- sqrt(snr * var(Reg.operator))
Error <- rnorm(n,sd=sdev)
Response = Reg.operator + Error
##########################
# LEARNING/TESTING SAMPLES 
##########################
CURVES1 <- CURVES[learning,]     # learning sample of curves
CURVES2 <- CURVES[testing,]      # testing sample of curves
Resp1 <- Response[learning]        # learning sample of responses
Resp2 <- Response[testing]         # testing sample of responses

##################################
### RUNNING STEPWISE ALGORITHM ###
##################################
results <- mpdp(CURVES1,Resp1,nbmax=6, nbbw=10, Grid=(1:20)*5, pcvpar=1/3,  kind.of.kernel="quadratic2")
##############################
### Selected design points ###
##############################
Selectedpoints <- results$Backwardselection
###########################################################################
### Fpcv contains the values of the penalized crossvalidation criterion ###
### obtained after the Forward addition stage (idem for Fcv without     ###
### penalization)                                                       ###
###########################################################################
Fpcv <- results$Fwdpcv
Fcv <- Fpcv/(1+(((1:length(Fpcv))/3)/log(nlearn)))
###############################################################
### Predictions with selected most-predictive design points ###
###############################################################
mspesa <- sum((Resp2-predict(results, CURVES2))^2)/ntest

##############################################################################
# RUNNING FUNCTIONAL NONPARAMETRIC REGRESSION (routine "funopare.knn.gcv") ###
##############################################################################
res0 <- funopare.knn.gcv(Resp1, CURVES1, CURVES2, 0, nknot=20, c(0,1))
mspefnr <- round(sum((res0$Predicted.values-Resp2)^2)/ntest,digit=3)
################################################################
# RUNNING FUNCTIONAL LINEAR REGRESSION (routine "Splinemlf") ###
################################################################
res.fl <- Splinemlf(Resp1, CURVES1, 5e-10, Posintknots=5*(1:19))
### the value 5e-10 was determined via cross-validation procedure
Predvalues <- as.vector(t(res.fl$af) %*% t(CURVES2)/ncol(CURVES2))
mspeflr <- sum((Resp2-Predvalues)^2)/length(Resp2)

###############################################################################
# You can end this simulation by comparing "mspesa" with "mspefnr" or "mspeflr"
# (i.e. the mean squared prediction error obtained respectively by our 
# stepwise algorithm, the functional nonparametric method and the functional
# linear one)
###############################################################################
############################################################################### 

#############################################################################
#############################################################################
#############################################################################
######################                          #############################
######################   CHOPPED PORK DATASET   ############################# 
######################                          #############################
#############################################################################
#############################################################################
#############################################################################
#########################################
### Downloading chopped pork dataset  ###
#########################################
file("http://www.lsp.ups-tlse.fr/staph/npfda/npfda-spectrometric.dat",open="r")
SPECDAT <- read.table("http://www.math.univ-toulouse.fr/staph/npfda/npfda-spectrometric.dat", header=F)
SPECURVES <- SPECDAT[,1:100]           # sample of curves
Response <- SPECDAT[,101]              # sample of responses
### computing 2nd derivative (via spline interpolation) of spectrometric curves ###
SPECD2 <- t(approx.spline.deriv(SPECURVES, 2, 20,  c(0,1),degree=3)$APPROX)
### splitting into learning and testing samples ###
learning <- (1:160)[-35]
testing <- 161:215
SPECURVES1 <- SPECD2[learning,]     # learning sample of curves
SPECURVES2 <- SPECD2[testing,]      # testing sample of curves
Specresp1 <- Response[learning]        # learning sample of responses
Specresp2 <- Response[testing]         # testing sample of responses
##################################
### RUNNING STEPWISE ALGORITHM ###
##################################
results <- mpdp(SPECURVES1, Specresp1, nbmax=10, nbbw=10, Grid=1:100, pcvpar=1,  kind.of.kernel="quadratic2")
###############################################################
### Predictions with selected most-predictive design points ###
###############################################################
mspesa <- sum((Specresp2-predict(results, SPECURVES2))^2)/length(testing)

#############################################################################
#############################################################################
#############################################################################
######################                          #############################
######################   ORANGE JUICE DATASET   ############################# 
######################                          #############################
#############################################################################
#############################################################################
#############################################################################
#########################################
### Downloading orange juice dataset  ###
#########################################
### Learning sample: functional predictors ###
file("http://www.ucl.ac.be/mlg/DataBases/ORANGE_JUICE/OJ_x_learning.txt",open="r")
OJSPECLEARN <- read.table("http://www.ucl.ac.be/mlg/DataBases/ORANGE_JUICE/OJ_x_learning.txt", header=F)
### Testing sample: functional predictors ###
file("http://www.ucl.ac.be/mlg/DataBases/ORANGE_JUICE/OJ_x_test.txt",open="r")
OJSPECTEST <- read.table("http://www.ucl.ac.be/mlg/DataBases/ORANGE_JUICE/OJ_x_test.txt", header=F)
### Learning sample: scalar response ###
file("http://www.ucl.ac.be/mlg/DataBases/ORANGE_JUICE/OJ_y_learning.txt",open="r")
OJresplearn <- read.table("http://www.ucl.ac.be/mlg/DataBases/ORANGE_JUICE/OJ_y_learning.txt", header=F)
### Testing sample: scalar response ###
file("http://www.ucl.ac.be/mlg/DataBases/ORANGE_JUICE/OJ_y_test.txt",open="r")
OJresptest <- read.table("http://www.ucl.ac.be/mlg/DataBases/ORANGE_JUICE/OJ_y_test.txt", header=F)
### dropping two outliers (unit 130 in learn/unit 44 in test) ###
OJresplearn <- as.vector(OJresplearn[-130,])
OJresptest <- as.vector(OJresptest[-44,])
OJSPECLEARN <- OJSPECLEARN[-130,]
OJSPECTEST <- OJSPECTEST[-44,]
### computing 1st derivative (via spline interpolation) of spectrometric curves ###
OJSPECLEARND1 <- t(approx.spline.deriv(OJSPECLEARN, 1, 50, c(0,1), degree=3)$APPROX)
OJSPECTESTD1 <- t(approx.spline.deriv(OJSPECTEST, 1, 50,  c(0,1),degree=3)$APPROX)
##################################
### RUNNING STEPWISE ALGORITHM ###
##################################
results <- mpdp(OJSPECLEARND1, OJresplearn, nbmax=10, nbbw=10, Grid=1:700, pcvpar=1/3,  kind.of.kernel="quadratic2")
###############################################################
### Predictions with selected most-predictive design points ###
###############################################################
mspesa <- sum((OJresptest-predict(results, OJSPECTESTD1))^2)/length(OJresptest)