#############################################################################
#############################################################################
#############################################################################
###                                                               ###########
###                 R Commandlines for implementing               ########### 
###              analyses given in our paper entitled             ###########
###  Regression when both Response and Predictor are Functions    ###########
###  co-authored by Frederic F., I. Van Keilegom, P. Vieu         ###########
###                     (submitted to JMVA)                       ###########
###                                                               ###########
#############################################################################
#############################################################################
#############################################################################

##############################################################################
##############################################################################
# SIMULATING FUNCTIONAL DATASETS
##############################################################################
##############################################################################
p <- 100
n <- 800
nlearn1 <- 250
nlearn2 <- 250
ntest <- 50
snr <- 0.05
x <- seq(0,pi,length=p)
a <- runif(n,min=0.2,max=2)
omega <- runif(n,min=1.5,max=2.5)
CURVESBIS <- cos(omega %*% t(x)) * a
PROCESS <- matrix(0,n,p)
for(ii in 1:n) 
       PROCESS[ii,] <- cumsum(rnorm(p,mean=0,sd=0.1))
CURVES <- CURVESBIS + PROCESS
##########################################################
# REGRESSION MODEL: Y(t) = int_0^t X_i(u)^2 du + e_i(t)
##########################################################
REG0 = 0.5 * (a^2) %*% t(x) + sin(2 * omega %*% t(x))*(a^2)/(4*omega)
REG = t(apply(CURVES^2,1,cumsum))

Regmean <- apply(REG,2,mean)
COVREG <- (t(REG) - Regmean) %*% t(t(REG) - Regmean)
COVREG <- COVREG/n

sigma2 <- snr * sum(diag(COVREG))/p
ERROR1 <- matrix(rnorm(n*p,sd=sqrt(sigma2)), nrow=n)
Error1.mean <- apply(ERROR1,2,mean)
ERROR1 <- t(t(ERROR1) - Error1.mean)
COVERROR1 <- (t(ERROR1) %*% ERROR1)/n
sum(diag(COVERROR1))

eigCOVREG <- eigen(COVREG)
Values <- eigCOVREG$values
VECTORS <- eigCOVREG$vectors
GAUSSIAN <- matrix(0,p,n)
for(k in 1:p)
      GAUSSIAN[k,] <- rnorm(n, mean=0, sd=sqrt(snr*abs(Values[k])))
Gaussian.mean <- apply(GAUSSIAN, 1, mean)
GAUSSIAN <- GAUSSIAN - Gaussian.mean
ERROR2 <- t(VECTORS %*% GAUSSIAN)
COVERROR2 <- (t(ERROR2) %*% ERROR2)/n
sum(diag(COVERROR2))

ERROR3 <- sqrt(0.7)*ERROR2 + sqrt(0.3)*ERROR1
COVERROR3 <- (t(ERROR3) %*% ERROR3)/n
sum(diag(COVERROR3))
#########################################################################
RESPONSE1 = REG + ERROR1
RESPONSE2 = REG + ERROR2
RESPONSE3 = REG + ERROR3
############################################
############################################
# DISPLAYING THREE SIMULATED CURVES (Fig. 1)
############################################
############################################
par(mfrow=c(1,2))
Index <- c(201,213,222)
matplot(x,t(CURVES[Index,]), main="Functional predictors",type="l",lwd=2,lty=1:3,xlab="",ylab="")
matplot(x,t(RESPONSE3[Index,])/p,main="Functional responses",type="l",lwd=2,lty=1:3,xlab="",ylab="")
####################################################################################
####################################################################################
## DISPALYING THE TWO MAIN ERROR TYPE (SHAPE NOISE + NOISED MEASUREMENTS) - (Fig. 2)
####################################################################################
####################################################################################
par(mfrow=c(1,3),mar=c(2,2,2,1))
Index <- c(201,213,222)
yrange <- range(rbind(RESPONSE1[Index,],RESPONSE2[Index,],RESPONSE3[Index,],REG[Index,]))/p
matplot(x,t(REG[Index,])/p,lwd=3,lty=1,ylim=yrange,type="l",xlab="",ylab="")
matlines(x,t(RESPONSE1[Index,])/p,ylim=yrange,lty=2)
matplot(x,t(REG[Index,])/p,lwd=3,lty=1,ylim=yrange,type="l",xlab="",ylab="")
matlines(x,t(RESPONSE2[Index,])/p,ylim=yrange,lty=2)
matplot(x,t(REG[Index,])/p,lwd=3,lty=1,ylim=yrange,type="l",xlab="",ylab="")
matlines(x,t(RESPONSE3[Index,])/p,ylim=yrange,lty=2)
##################################
##################################
### uploading npfda R routines ###
##################################
##################################
file("http://www.math.univ-toulouse.fr/staph/npfda/npfda-routinesR.txt",open="r")
source("http://www.math.univ-toulouse.fr/staph/npfda/npfda-routinesR.txt")
#################################
#################################
### running ffunopare.knn.gcv ###
#################################
#################################
# Learning sample
RESPONSES.LEARN=RESPONSE3[1:200,] # functional responses
PREDICTORS.LEARN=CURVES[1:200,]   # functional predictors
# Testing functional predictors
PREDICTORS.TEST=CURVES[201:250,]
# Launching the functional nonparametric regression with specified grid for "Knearest" 
# (the algorithm selects the number of neighbours among those given in "Knearest"); 
# the pca semi-metric (4th eigenvectors are retained)
res = ffunopare.knn.gcv(RESPONSE3[1:200,], CURVES[1:200,], CURVES[201:250,], 4, Knearest=5:25, semimetric="pca")
#################################################
# DISPLAYING PREDICTED RESPONSES VS r(X) (Fig. 3)
#################################################
par(mfrow=c(1,1),mar=c(2,2,2,1))
Ind <- c(1,2,3)
ylimits=range(rbind(res$Predicted.values[Ind,],REG[Ind+200,]))/p
matplot(x,t(res$Predicted.values[Ind,])/p,ylim=ylimits,type="l",lty=1,lwd=2,xlab="",ylab="")
matlines(x,t(REG[Ind+200,])/p,ylim=ylimits,lwd=1,lty=2)
####################################################################
# PROJECTING PREDICTIONS ONTO EIGENSPACES BUILT FROM THE COVARIANCE 
# OPERATOR OF THE FUNCTIONAL RESPONSE (LEARNING SAMPLE) - (Fig. 4)
####################################################################
percentage <- round(100*Values/sum(Values),1)
Reg1 <- as.vector(REG %*% VECTORS[,1])
Pred31 <- as.vector(res$Predicted.values %*% VECTORS[,1])
Reg2 <- as.vector(REG %*% VECTORS[,2])
Pred32 <- as.vector(res$Predicted.values %*% VECTORS[,2])
Reg3 <- as.vector(REG %*% VECTORS[,3])
Pred33 <- as.vector(res$Predicted.values %*% VECTORS[,3])
Reg4 <- as.vector(REG %*% VECTORS[,4])
Pred34 <- as.vector(res$Predicted.values %*% VECTORS[,4])
par(mfrow=c(2,2),mar=c(2,2,2,1))
plot(Reg1[201:250]/p, Pred31/p, main=paste("Component 1 (",percentage[1],"%)"))
abline(0,1)
plot(Reg2[201:250]/p, Pred32/p, main=paste("Component 2 (",percentage[2],"%)"))
abline(0,1)
plot(Reg3[201:250]/p, Pred33/p, main=paste("Component 3 (",percentage[3],"%)"))
abline(0,1)
plot(Reg4[201:250]/p, Pred34/p, main=paste("Component 4 (",percentage[4],"%)"))
abline(0,1)
##############################
# SELECTING TESTING SAMPLE 
##############################
Testing <- sample(n,ntest)
##############################
# SELECTING 1st LEARNING SAMPLE 
##############################
Learning1 <- sample((1:n)[-Testing],nlearn1)
##############################################################################
##############################
# SELECTING 2nd LEARNING SAMPLE 
##############################
Learning2 <- (1:n)[-c(Learning1,Testing)]
##############################################################################
CURVES1 <- CURVES[Learning1,]    # learning sample of curves
CURVES2 <- CURVES[Testing,]      # testing sample of curves
RESPONSE=RESPONSE3
RESP1 <- RESPONSE[Learning1,]    # learning sample of responses
RESP2 <- RESPONSE[Testing,]      # testing sample of responses
REG1 <- REG[Learning1,]
REG2 <- REG[Testing,]
##############################################################################
##############################################################################
#  ASYMPTOTIC NORMALITY OF BOOTSTRAP (bootstrapped estimator minus estimator)
##############################################################################
##############################################################################
res <- ffunopare.knn.gcv(RESP1, CURVES1, CURVES2, 4, Knearest=6:14, semimetric="pca")
p <- ncol(CURVES1)
n1 <- nrow(CURVES1)
n2 <- nrow(CURVES2)
u <- sqrt(5)/10
pu <- 0.5+u
bwboot1 <- res$knearest.opt
bwboot2 <- bwboot1
rhat.bwboot1 <- ffunopare.knn(RESP1, CURVES1, CURVES2, bwboot1, 4, semimetric="pca")
##############################
# Computing centered residuals
##############################
RESID <- RESP1 - rhat.bwboot1$Estimated.values 
RESID <- t(t(RESID) - apply(RESID, 2, mean))
###################################
# Drawing "wild-bootstrapped errors
###################################
nb.boot <- 200
ERROR.BOOT <- matrix(0,n2*nb.boot,p)
for(boot in 1:nb.boot){
    cat(paste("iteration=",boot,"\n"))
    U <- runif(n1)
    U[U <= pu] <- 0.5 * (1 - sqrt(5))
    U[U > pu] <- 0.5 * (1 + sqrt(5))
    RESID.BOOT <- RESID * U
    RESID.BOOT <- t(t(RESID.BOOT) - apply(RESID.BOOT,2,mean))
    #######################
    # Drawing new responses
    #######################
    RESP.BOOT <- rhat.bwboot1$Estimated.values + RESID.BOOT
    ##
    # Computing predicted fat content
    #################################
    rhat.bwboot2 <- ffunopare.knn.single(RESP.BOOT, CURVES1, CURVES2, bwboot2, 4, semimetric="pca")
    ERROR.BOOT[((boot-1)*n2+1):(boot*n2),] <- rhat.bwboot2$Predicted.values - rhat.bwboot1$Predicted.values
}
##############################################################################
##############################################################################
# ASYMPTOTIC NORMALITY OF ESTIMATOR MINUS TRUE VALUE  (MONTE-CARLO SCHEME)
##############################################################################
##############################################################################
nb.mc <- 200
ERROR.MC <-matrix(0,n2*nb.mc,p)
for(mc in 1:nb.mc){
###########################################################
# SIMULATING REGRESSION MODEL
###########################################################
       n.mc <- 200
       a.mc <- runif(n.mc,min=0.2,max=2)
       omega.mc <- runif(n.mc,min=1.5,max=2.5)
       CURVESBIS.MC <- cos(omega.mc %*% t(x)) * a.mc
       PROCESS.MC <- matrix(0,n.mc,p)
       for(ii in 1:n.mc) 
              PROCESS.MC[ii,] <- cumsum(rnorm(p,mean=0,sd=0.1))
       CURVES.MC <- CURVESBIS.MC + PROCESS.MC
       ##########################################################
       # REGRESSION MODEL: Y(t) = int_0^t X_i(u)^2 du + e_i(t)
       ##########################################################
       REG.MC = t(apply(CURVES.MC^2,1,cumsum))
       Regmean.mc <- apply(REG.MC,2,mean)
       COVREG.MC <- (t(REG.MC) - Regmean.mc) %*% t(t(REG.MC) - Regmean.mc)
       COVREG.MC <- COVREG.MC/n.mc
       sigma2.mc <- snr * sum(diag(COVREG.MC))/p
       ERROR1.MC <- matrix(rnorm(n.mc*p, mean=0, sd=sqrt(sigma2.mc)), nrow=n.mc)
       Error1.mc.mean <- apply(ERROR1.MC,2,mean)
       ERROR1.MC <- t(t(ERROR1.MC) - Error1.mc.mean)
       eigCOVREG.MC <- eigen(COVREG.MC)
       Values.mc <- eigCOVREG.MC$values
       VECTORS.MC <- eigCOVREG.MC$vectors
       GAUSSIAN.MC <- matrix(0,p,n.mc)
       for(k in 1:p)
             GAUSSIAN.MC[k,] <- rnorm(n.mc, mean=0, sd=sqrt(snr*abs(Values.mc[k])))
       Gaussian.mc.mean <- apply(GAUSSIAN.MC, 1, mean)
       GAUSSIAN.MC <- GAUSSIAN.MC - Gaussian.mc.mean
       ERROR2.MC <- t(VECTORS.MC %*% GAUSSIAN.MC)
       ERROR3.MC <- sqrt(0.7)*ERROR2.MC + sqrt(0.3)*ERROR1.MC
       RESPONSE3.MC = REG.MC + ERROR3.MC
       rhat.bwboot2.mc <- ffunopare.knn.single(RESPONSE3.MC, CURVES.MC, CURVES2, bwboot2, 4, semimetric="pca")
       ERROR.MC[((mc-1)*n2+1):(mc*n2),] <- rhat.bwboot2.mc$Predicted.values - REG2
       cat(paste("iteration=", mc, "\n"))
}
######################
# RESULTS
#####################################################################################
# DISPLAYING COMPONENTWISE COMPARISONS BETWEEN BOOTSTRAP AND MC DISTRIBUTION (Fig. 5)
#####################################################################################
Bern.mc <- sample(c(-1,1),nb.mc, replace = TRUE)
Weight.mc <- rep(Bern.mc, rep(n2,nb.mc))
ERROR.MCC <- ERROR.MC * Weight.mc
ERROR.MC.PROJ <- ERROR.MCC %*% VECTORS/p
Bern.boot <- sample(c(-1,1),nb.boot, replace = TRUE)
Weight.boot <- rep(Bern.boot, rep(n2,nb.boot))
ERROR.BOOTC <- ERROR.BOOT * Weight.boot
ERROR.BOOT.PROJ <- ERROR.BOOTC %*% VECTORS/p
nb.comp <- 4
par(mfrow=c(5,nb.comp), mar=c(2,2,2,1))
for(ind in 1:5){
for(comp in 1:nb.comp){
width=6
Errors.boot <- ERROR.BOOT.PROJ[seq(ind,nb.boot*n2,n2),comp]
Errors.boot.std <- Errors.boot/sqrt(var(Errors.boot))
bandwidth.boot <- 3*bw.nrd(Errors.boot.std)
Density.boot <- density(Errors.boot.std,bw=bandwidth.boot,from=-width,to=width)
Errors.mc <- ERROR.MC.PROJ[seq(ind,nb.mc*n2,n2),comp]
Errors.mc.std <- Errors.mc/sqrt(var(Errors.mc))
bandwidth.mc <- 3*bw.nrd(Errors.mc.std)
Density.mc <- density(Errors.mc.std,bw=bandwidth.mc,from=-width,to=width)
xlimits=range(c(Density.boot$x,Density.mc$x))
ylimits=range(c(Density.boot$y,Density.mc$y))
plot(Density.mc$x, Density.mc$y, xlab="",ylab="",main=paste("curve", ind,"- comp",comp), type="l", xlim=xlimits, ylim=ylimits, lwd=1)
abline(v=0)
par(new=T)
plot(Density.boot$x, Density.boot$y, xlab="",ylab="", type="l", xlim=xlimits, ylim=ylimits, lty=2, lwd=2)
}
}
######################################################################
# COMPONENTWISE BOXPLOTS OF 0-1 SCALE VARIATIONAL DISTANCE BETWEEN  
# DSITRIBUTIONS OF BOOTSTRAPPED ERRORS AND "ESTIMATED" ERRORS (Fig. 6)
######################################################################
VDIST <- matrix(0,n2,nb.comp)
width <- 6
for(ind.unit in 1:n2){
for(comp in 1:nb.comp){
Errors.boot <- ERROR.BOOT.PROJ[seq(ind.unit,nb.boot*n2,n2),comp]
Errors.boot.std <- Errors.boot/sqrt(var(Errors.boot))
bandwidth.boot <- 3*bw.nrd(Errors.boot.std)
Density.boot <- density(Errors.boot.std,bw=bandwidth.boot,from=-width,to=width)
Errors.mc <- ERROR.MC.PROJ[seq(ind.unit,nb.mc*n2,n2),comp]
Errors.mc.std <- Errors.mc/sqrt(var(Errors.mc))
bandwidth.mc <- 3*bw.nrd(Errors.mc.std)
Density.mc <- density(Errors.mc.std,bw=bandwidth.mc,from=-width,to=width)
VDIST[ind.unit, comp] <- 0.5*sum(abs(Density.mc$y-Density.boot$y))*((2*width)/512)
}
}
VDIST <- round(VDIST, digit=3)
par(mfrow=c(1,1), mar=c(3,2,3,1))
boxplot(list(VDIST[,1],VDIST[,2],VDIST[,3],VDIST[,4]),main="Componentwise variational distances at 50 fixed curves", names=paste("comp", 1:nb.comp), ylim=c(0,0.12))
################################################################
# TOWARDS FUNCTIONAL CONFIDENCE AREA 
################################################################
res <- ffunopare.knn.gcv(RESP1, CURVES1, CURVES2, 4, Knearest=6:14, semimetric="pca")
Rhatmean <- apply(res$Estimated.values,2,mean)
COVRHAT <- (t(res$Estimated.values) - Rhatmean) %*% t(t(res$Estimated.value) - Rhatmean)
COVRHAT <- COVRHAT/200
eigCOVRHAT <- eigen(COVRHAT)
Valrhat <- eigCOVRHAT$values
VECRHAT <- eigCOVRHAT$vectors
nb.comp=4
alpha=0.05
par(mfrow=c(3,3), mar=c(2,2,1,1))
for(ind.unit in 1:9){
QUANT <- matrix(0,2,nb.comp)
for(comp in 1:nb.comp){
ERROR.BOOT.PROJ <- ERROR.BOOTC %*% VECRHAT/p
Errors.boot <- ERROR.BOOT.PROJ[seq(ind.unit,nb.boot*n2,n2),comp]
Errors.boot.std <- Errors.boot/sqrt(var(Errors.boot))
bandwidth.boot <- 2*bw.nrd(Errors.boot.std)
nbins <- 2^12
Density.boot <- density(Errors.boot.std,bw=bandwidth.boot,from=-width,to=width,n=nbins)
Probas <- cumsum(Density.boot$y)*(2*width)/nbins
quant.inf <- Density.boot$x[sum(Probas<alpha/(2*nb.comp))]
quant.sup <- Density.boot$x[sum(Probas<1-alpha/(2*nb.comp))]
QUANT[,comp] <- c(quant.inf,quant.sup) * sqrt(var(Errors.boot))
}
res <- ffunopare.knn.gcv(RESP1, CURVES1, CURVES2, 4, Knearest=6:14, semimetric="pca")
Predictions <- t(res$Predicted.values[ind.unit,]) %*% VECRHAT[,1:nb.comp]/p
BANDS <- QUANT + rbind(Predictions,Predictions)
EXTREMA <- matrix(0,2^nb.comp,nb.comp)
for(comp in 1:(nb.comp)){ 
          even <- 2*(1:nb.comp)
          odd <- 2*(0:(nb.comp-1))+1
          reparg1 <- rep(c(odd[comp],even[comp]),2^(comp-1))
          reparg2 <- rep(2^(nb.comp-comp),2^comp)
          EXTREMA[,comp] <- rep(reparg1, reparg2)
}
COEFEXTREMA <- matrix(0,nb.comp,2^nb.comp)
for(ii in 1:(2^nb.comp)) COEFEXTREMA[,ii] <- BANDS[EXTREMA[ii,]]
CURVESEXTREMA <- VECRHAT[,1:nb.comp] %*% COEFEXTREMA
COEFBOUNDS <- apply(CURVESEXTREMA,1,range)
Coefreg2 <- as.vector(t(REG2[ind.unit,]) %*% VECTORS[,1:nb.comp])/p
Reg2curves <- VECTORS[,1:nb.comp] %*% Coefreg2
plot(x,REG2[ind.unit,]/p,lwd=4,type="n",lty=2,ylim=range(CURVESEXTREMA),xlab="",ylab="")
lines(x,COEFBOUNDS[1,],lwd=4)
lines(x,COEFBOUNDS[2,],lwd=4)
polygon(c(x, rev(x)), c(COEFBOUNDS[2,], rev(COEFBOUNDS[1,])), col = "gray", border = NA)
par(new=T)
plot(x,Reg2curves,type="l",lwd=4,ylim=range(CURVESEXTREMA))
par(new=T)
plot(x,REG2[ind.unit,]/p,type="l",lwd=4,lty="dotted",ylim=range(CURVESEXTREMA))
}