function [x,Ax,funct,ell2_it,ell1_it,ell0_it] = bpdn_it(P,lambda,nbIter,Anorm)

% [x,Dx,funct,ell2_it,ell1_it,ell0_it] = bpdn_it(P,lambda,nbIter,Anorm)
%
% Finds, with the IT algorithm, x minimizer of 
%            norm(P.A*x - P.b) + lambda * sum(abs( x ))
% where lambda is a non-negative number and P is a SPARCO problem.
%
% P is a problem (at the SPARCO format)
% nbIter is the number of iterations
% Dnorm is operator norm of D (it should be computed offline with the function get_operator_norm)
% 
% The results are :
% x : the coordinates
% Dx : the reconstruction from the coordinates
% funct : a signal containing the value of the functionnal which is minimized, 
%            along the iterative process
% ell2_it : a signal containing  sqrt( mean( (P.A*x - P.b).^2 )), along the iterative process
% ell1_it : a signal containing   mean( fabs( x ) ), along the iterative process
% ell0_it : a signal containing  100*mean( x~=0 ), along the iterative process
%


% --------------------------------- Creation of data ----------------------------------

b = P.b;

if lambda <= 0,
    fprintf('lambda should be non negative \n');
    return;
end;

siz=P.A(P.b,0);              %%% number of rows and columns in P.A 
sizeP=cell2mat(siz(1,2));
sizeN=cell2mat(siz(1,1));
% ----------------------------------- Run  IT --------------------------------------------

% The next few lines are computing the largest eigen value of the operator AT*A.

Beta=Anorm/0.999; %%% Theoretically, the norm of A must be strictly smaller than 1 (once the elements 
              %%% of the dictionary have been rescaled), so we divide by 0.999.


x = zeros(sizeP,1);
Ax = P.A(x,1);    %%% useless, unless the initialization is modified      
Ax=(1/Beta)*Ax;      %%% Following what is said in our paper, IT is run on the dictionary (1/Beta)\psi_i
                  %%% Ax should therefore be multiplied by (1/Beta)


%%% few statistics                            %%% The final coordinates would be (1/Beta)*x if the
                                              %%% algorithm were to stop here, 
                                              %%% we modify the l1 stat accordingly.
funct(1) = sum((Ax-b).^2)+lambda*(1/Beta)*sum(abs(x));
ell0_it(1) = 100/length(x)*sum(abs(x)>1e-10);
ell1_it(1) = sum(abs(x))/(length(x)*Beta);
ell2_it(1) = sqrt(mean((Ax-b).^2));



for k = 1:1:nbIter, 
    err = b-Ax(:);
    ATerr = P.A(err,2);
    Temp = (1/Beta)*ATerr+x;

    x = soft_threshold(Temp,2*Beta,lambda); % soft-thresholding 
                                 %%% the soft thresholding is applied with the threshold lambda/(2*Beta)
                                 %%% the 2 comes from the paper on IT                                
                                 %%% the Beta comes from the fact that, when you apply the 'trick' proposed
                                 %%% in our paper, you need to apply IT with lambda/Beta
    Ax = P.A(x,1);
    Ax = (1/Beta)*Ax;            %%% Again, IT is run on the dictionary (1/Beta)\psi_i
   
    %%% few statistics

    funct(k+1) = sum((Ax-b).^2)+lambda*(1/Beta)*sum(abs(x));
    ell0_it(k+1) = 100/length(x)*sum(abs(x)>1e-10);
    ell1_it(k+1) = sum(abs(x))/(length(x)*Beta); %%% The final coordinates would be (1/Beta)*x if the
                                                 %%% algorithm were to stop here, 
                                                 %%% we modify the l1 stat accordingly.
    ell2_it(k+1) = sqrt(mean((Ax-b).^2));
  
    %%% Display the statistics
    fprintf('IT Iteration: %i  funct: %10.5f, [L2,L1,L0]:  %10.5f %10.5f %10.5f  \n',k,funct(k+1)/sizeN,ell2_it(k+1),ell1_it(k+1),ell0_it(k+1));

end;
x=(1/Beta)*x;

