/* Robust regression using Iteratively Reweighted Least Squares Idea: Compute weights based on size of residual. Large residuals get small weights, leading to a sort of automatic outlier rejection. This example uses Mallows C-estimators and the Tukey biweight function. Ref: Kingsbury 1987 (Interface, p.301-) See also, SAS/STAT UG, PROC NLIN Example 5 */ title 'Robust Regression using IRLS'; %include data(fuel); /*-------------------------------------------------------------* | Run OLS regression, get regression coefficients & leverage | *-------------------------------------------------------------*/ proc reg data=fuel outest=estim; model fuel = tax drivers road inc ; output out=leverage h=h p=yhat_ols; data param; set estim; /* parameter estimates from proc reg step */ drop intercep fuel tax drivers road inc _depvar_ _model_ _type_ _rmse_ ; if _sigma_=. then _sigma_ = _rmse_; /* V6 */ b0start = intercep; b1start = tax; b2start = drivers; b3start = road; b4start = inc; proc print; /*--------------------------------------* | Merge data with parameter estimates | *--------------------------------------*/ data fuel2; if _n_=1 then set param; set leverage; proc nlin data=fuel2 nohalve; title2 'C1 Biweight Estimator'; parms /* declare the parameters */ b0= 377.291146 b1= -34.790149 b2= 1336.449357 b3= -0.002426 b4= -0.066589; if _iter_ = 0 & _n_ = 1 then do; b0 = b0start; b1 = b1start; b2 = b2start; b3 = b3start; b4 = b4start; end; model fuel = b0 + b1*tax + b2*drivers + b3*road + b4*inc ; der.b0 = 1; /* derivatives of model wrt parameters */ der.b1 = tax; der.b2 = drivers; der.b3 = road; der.b4 = inc; resid = fuel - model.fuel; /* compute residual */ retain b 4.685; /* tuning constant */ r = abs( resid / _sigma_ ); /* scaled residual */ if r <= b /* compute weight */ then _weight_ = (sqrt(1-h)*(1 - r/b)**2)**2; else _weight_ = 0; output out=nlinout r=rbiwt p=yhat_wls; /*-----------------------------------------------* * Compute biweight values from output data set * *-----------------------------------------------*/ data nlinout; set nlinout; drop _sigma_ b b0start b1start b2start b3start b4start; r = abs(rbiwt / _sigma_); b=4.685; if r <=b then _weight_ = (sqrt(1-h)*(1 - r/b)**2)**2; else _weight_ = 0; /*--------------------------------------------------------------* | Print output dataset to see which obs have been downweighted | *--------------------------------------------------------------*/ proc print data=nlinout; id state; var fuel h r rbiwt _weight_ yhat_ols yhat_wls; proc plot; plot _weight_ * h = '*' $ state; run;