% ### EXresampleLinearFit.m ### 11.16.14 {C. Bergevin} % Resampling approach to demonstrate means to estimate uncertainty % ** WITHOUT REPLACEMENT ** % [see also EXregression1.m] clear % ------------------------------------- xR= [0 2]; % specify range of x-values xNum= 100; % number of 'data' points % choose parameters of linear function (y=aD+bD*x+noise) aD= 0.15; % intercept bD= 0.44; % slope noiseF= 0.1; % noise factor {0.1} M= 100; % # of resamples range= 1/3; % range to vary span over [0,1] % ------------------------------------- data.x= linspace(xR(1),xR(2),xNum); data.y= aD+ bD*data.x + noiseF*randn(numel(data.x),1)'; % determine noisy (straight) line disp(['==================================']); disp(['Base function: y= a+b*x= ',num2str(aD),' + ',num2str(bD),'*x (+ Gaussian noise)']); N= numel(data.x); % total number of points % ------------ % Linear regression via built-in Matlab function to ENTIRE data set [p0,S]= polyfit(data.x,data.y,1); % also calculate r^2 (coefficient of determination) via % http://www.mathworks.com/help/matlab/data_analysis/linear-regression.html?refresh=true yfit = polyval(p0,data.x); % this line is equivalent to > yfit = p(1) * x + p(2); yresid = data.y - yfit; SSresid = sum(yresid.^2); SStotal = (length(data.y)-1) * var(data.y); RS0= 1 - SSresid/SStotal; disp(['polyfit to entire data set: y= ',num2str(p0(2)),' + ',num2str(p0(1)),'*x (r^2 = ',num2str(RS0),')']); % ------------ % Resampling: regression on randomly chosen subsets of the data for nn=1:M temp= randperm(N); % create randomresampling index tINDX= temp(1:ceil(range*N)); % trim to subset of specified size [p,k]= polyfit(data.x(tINDX),data.y(tINDX),1); % fit for subset fP(nn,:)= p; % store values away end msg = ['Bootstrapped fit (+/- STD): y =',num2str(mean(fP(:,2))),' (',num2str(std(fP(:,2))),... ') + x*',num2str(mean(fP(:,1))),' (',num2str(std(fP(:,1))),')']; disp(msg); % ------------ % visualize figure(1); clf; plot(data.x,data.y,'r.'); % original 'data' points hold on; grid on; xlabel('x'); ylabel('y'); axis([xR(1) xR(2) min(data.y)-0.25 max(data.y)+0.25]); plot(data.x,p0(2)+p0(1)*data.x,'g-','LineWidth',3); % fit to entire data set plot(data.x,mean(fP(:,2))+mean(fP(:,1))*data.x,'k--','LineWidth',2); % fit from bootstrapped calculation legend('noisy data','polyfit to entire set','mean resampled trend','Location','NorthWest')