The maximum number of variables is 10; to increase program limits change the respective constant MAXV at the beginning of the program.randomaccess;
NoDataFileVariableNames;
delta:=1.e-10; { tolerance check of lambda against 0}
toler:=.001; { tolerance check for convergence }
maxtable:=30; { maximum number of rows in Scrollsheet}
redim table(maxtable,6);
maxc:=ncases;
maxv:=10; { max is 10 variables; change if necessary}
maxiter:=40; { max no. of iterations is 40; change if necessary}
Sub BCTransform(ygeometricmean,yhat,nc,lam)
{Box-Cox transformation}
begin
dim yhat(nc);
for i:=1 to nc do begin
if abs(lam)<delta then
yhat(i):=log(yhat(i))*ygeometricmean
else
yhat(i):=(yhat(i)^lam-1)/(lam*ygeometricmean^(lam-1));
end;
end;
Function GetBracketsForSearch(l)
begin
dim l(2);
l(1):=-2;
l(2):=2;
iret:=0;
if DisplayNumericInputBox ('Range of lambda',
'Lower limit:|Upper limit:', l)=0 then begin
iret:=1;
goto thatsit;
end;
if l(1)>-.05 then begin
DisplayMessageBox (MB_ICONSTOP, 'Invalid lower lambda',
'Lower lambda value must < -.05.');
iret:=1;
end;
if l(2)<.05 then begin
DisplayMessageBox (MB_ICONSTOP, 'Invalid upper lambda',
'Upper lambda value must > .05.');
iret:=1;
end;
if (abs(l(1))>10) or (abs(l(2))>10) then begin
DisplayMessageBox (MB_ICONSTOP, 'Invalid lambda values',
'User-specified lambda values too large.');
iret:=1;
end;
thatsit:;
GetBracketsForSearch:=iret;
end;
Function MissingDataorYminltzero(nc,var,n,dv)
begin
dim var(n);
MissingDataorYminltzero:=0;
iflag:=0;
for i:=1 to n do begin
j:=ValCount (data(1,var(i)), 1, ncases);
if j<ncases then iflag:=1;
end;
j:=ValCount (data(1,dv), 1, ncases);
if j<ncases then iflag:=1;
if iflag=1 then begin
DisplayMessageBox(MB_ICONSTOP, 'Missing Data',
'Some cases have missing data; this program requires that all cases are'+
' complete; use the missing data replacement facilities to remove missing'+
' data values.');
MissingDataorYminltzero:=1;
goto thatsit;
end;
ValMin (data(1,dv), 1, ncases, ymin);
if ymin<=0 then begin
DisplayMessageBox(MB_ICONSTOP, 'Invalid Y Values',
'Some y<=0; all dependent variable values must be greater than 0.');
MissingDataorYminltzero:=1;
end;
thatsit:;
end;
Sub Regression(x,nc,maxv,var,n,yhat,xx,dv,lam,delta,ygeometricmean)
begin
dim yhat(nc);
dim x(nc,maxv);
dim xx(maxv,maxv);
dim var(maxv);
{ set first column in X to 1 for intercept}
MatrixFill (1,x,1,1, nc, 1);
{ copy data to x}
for i:=1 to n do MatrixCopy (data, 1, var(i), nc, 1, x, 1, i+1);
{ copy y}
MatrixCopy (data, 1, dv, ncases, 1, yhat, 1, 1);
{Box-Cox transformation}
BCTransform(ygeometricmean,yhat,ncases,lam);
MatrixCopy (yhat, 1, 1, nc, 1, x, 1, n+1+1);
{compute X'X}
MatrixCrossProductOfDev (x, 0, xx);
{ sweep on xx}
MatrixSweep (xx, 1, n+1, 1);
end;
Sub GridSearch(maxiter,x,maxv,var,n,yhat,xx,l,byref lam,
byref ygeometricmean, dv,delta,iprint,
xplot1,xplot2,yplot1,yplot2)
begin
dim xplot1(20);
dim yplot1(20);
dim xplot2(2);
dim yplot2(2);
dim yhat(ncases);
dim x(ncases,maxv);
dim xx(maxv,maxv);
dim var(maxv);
dim l(2);
{manual/visual optimization of lambda}
{compute geometric mean}
MatrixCopy (data, 1, dv, ncases, 1, yhat, 1, 1);
ygeometricmean:=0;
for i:=1 to ncases do ygeometricmean:=ygeometricmean+log(yhat(i));
ygeometricmean:=exp(ygeometricmean/ncases);
{create Scrollsheet to report progress of search }
line01$:='Searching for|smallest lambda';
colnam$:='Lambda| SSE ';
{start iterations}
bestss:=1e+30;
bestlam:=0;
for iter:=1 to 20 do begin
lam:=l(1)+(iter-1)*(l(2)-l(1))/19;
Regression(x,ncases,maxv,var,n,yhat,xx,dv,lam,delta,ygeometricmean);
sigma:=xx(n+2,n+2);
xplot1(iter):=lam;
yplot1(iter):=sigma;
table(iter,1):=lam;
table(iter,2):=sigma;
if sigma<bestss then begin
bestss:=sigma;
bestlam:=lam;
end;
if iter=1 then begin
shandle:=NewScrollsheet (iter, 2, table, line01$, ?rownam$, colnam$);
end else begin
DeleteScrollsheet (shandle);
shandle:=NewScrollsheet (iter, 2, table, line01$, ?rownam$, colnam$);
end;
end;
{print Scrollsheet if printing was requested}
if iprint=1 then PrintScrollsheet (shandle);
{graph of lambda versus SSE(lambda)}
Graph:=Newgraph (LINEPLOT, 'Lambda versus SSE(lambda)', 'SSE(lambda)',
'Lambda', 20, xplot1, yplot1);
GraphSetPlotPointStyle (Graph, 1, ON, ?Pattern, ?Size, ?Color);
GraphSetTitle (Graph, TITLE_TOP2, 'Dependent variable: '+VarName(dv));
line01$:='Indep.: ';
for i:=1 to min(n,6) do line01$:=line01$+VarName(var(i))+' ';
if n>6 then line01$:=line01$+'...';
GraphSetTitle (Graph, TITLE_TOP3, line01$);
GraphSetTitle (Graph, TITLE_TOP4,
'The intersection of the 95% C.I line with the SSE line');
GraphSetTitle (Graph, TITLE_TOP5,
'marks the 95% confidence limits for the best lambda');
{determine confidence line for smallest SSE;
see Mason, Gunst, Hess, page 557}
df:=ncases-n-1;
t:=VStudent (.95, df);
xplot2(1):=l(1);
yplot2(1):=bestss*(1+t*t/df);
xplot2(2):=l(2);
yplot2(2):=yplot2(1);
GraphAddPlot (Graph, LINEPLOT, '', 2, xplot2, yplot2);
GraphSetScaleTextLabels (Graph, AX_RY, 1, yplot2(1), '95% C.I.');
{GraphSetScaleValuesStyle (Graph, AX_RY, SCALEVALUES_NUMERIC,
?NumWidth, ?NumDec, ?DateType, ?SkippedLabels);}
{display the graph}
DisplayGraph (Graph);
if iprint=1 then PrintGraph (Graph);
lam:=bestlam;
end;
Sub Goldensearch(maxiter,x,maxv,var,n,yhat,xx,l,byref lam,
byref ygeometricmean, dv,delta,iprint)
begin
dim yhat(ncases);
dim x(ncases,maxv);
dim xx(maxv,maxv);
dim var(maxv);
dim l(2);
{ begin iterative search (golden search of bracketed function) }
ii:=0;
r:=.61803399;c:=1.-r;
x0:=l(1);x3:=l(2);x2:=0;
if abs(l(2))>abs(l(1)) then begin
x1:=0;x2:=c*l(2);
end else begin
x2:=0;x1:=c*l(1);
end;
lamold:=0;
{compute geometric mean of y}
MatrixCopy (data, 1, dv, ncases, 1, yhat, 1, 1);
ygeometricmean:=0;
for i:=1 to ncases do ygeometricmean:=ygeometricmean+log(yhat(i));
ygeometricmean:=exp(ygeometricmean/ncases);
line01$:='Searching for|smallest lambda';
colnam$:='Lambda| SSE ';
{start iterations}
for iter:=1 to maxiter do begin
if iter=1 then lam:=x1;
if iter=2 then lam:=x2;
Regression(x,ncases,maxv,var,n,yhat,xx,dv,lam,delta,ygeometricmean);
{ compute f(l)}
sigma:=xx(n+2,n+2);
fl:=sigma;
{ display scrollsheet of iterations}
irow:=iter;
if iter>maxtable then begin
MatrixCopy (table, 2, 1, maxtable-1, 2, table, 1, 1);
rownam$:=SDelete (rownam$, 1, 4);
irow:=maxtable;
end;
table(irow,1):=lam;table(irow,2):=sigma;
if iter=1 then begin
shandle:=NewScrollsheet (iter, 2, table, line01$, ?rowname$, colnam$);
end else begin
DeleteScrollsheet (shandle);
shandle:=NewScrollsheet (irow, 2, table, line01$, ?rowname$, colnam$);
end;
{ first and second iteration:}
if iter=1 then f1:=fl;
if iter=2 then f2:=fl;
{DEBUG}
{ writeln('lam:',lam,'sigma:',sigma,'iter:',iter);}
{ENDDEBUG}
{find new point}
if iter>2 then begin
if ii=1 then f2:=fl;
if ii=2 then f1:=fl;
if iprint=1 then begin
writeln('Iteration:',iter,
'lambda=',lam,
'SSE=',sigma);
end;
if abs(x3-x0)>toler*(abs(x1)+abs(x2)) then begin
{ compute new lambda}
if f2<f1 then begin
x0:=x1;x1:=x2;x2:=r*x1+c*x3;
f0:=f1;f1:=f2;
lam:=x2;
ii:=1;
end else begin
x3:=x2;x2:=x1;x1:=r*x2+c*x0;
f3:=f2;
f2:=f1;
lam:=x1;
ii:=2;
end;
end else begin
lam:=x2;
if f1<f2 then lam:=x1;
{stop iterations}
if iprint=1 then begin
writeln(' ');
writeln('Iterations converged after ',iter,' iterations');
writeln('Final value of lambda=',lam);
writeln('Final value of SSE=',sigma);
writeln(' ');
end;
goto thatsit;
end;
end;
end;
thatsit:;
end;
{======================================================================================}
{ main program }
{======================================================================================}
{ allocate memory for arrays}
{dimension global array table }
redim l(2);
redim xplot1(20);
redim yplot1(20);
redim xplot2(2);
redim yplot2(2);
redim b(maxv);
redim yhat(ncases);
redim ytransf(ncases);
redim error(ncases);
redim error0(ncases);
redim x(ncases,maxv);
redim xx(maxv,maxv);
redim var(maxv);
{intro; and find out whether or not to print results}
iret:=DisplayMessageBox (MB_YESNOCANCEL,
'Box-Cox Transformation',
'This program will estimate the maximum likelihood lambda for the '+
'Cox-Box transformation of the dependent variable. Do you want to '+
'print all results?');
if iret=IDCANCEL then stop;
iprint:=0;
if iret=IDYES then iprint:=1;
{ select variables}
if SelectVariables2 ('Select Variables',
1, 1, dv, i,'Dependent variable:',
1, maxv-1-1, var, n, 'Independent variables:')=0 then stop;
{check for missing data }
if MissingDataorYminltzero(ncases,var,n,dv) then stop;
{ get values ("brackets") for lambda}
if GetBracketsForSearch(l) then stop;
{ Give user the choice of either graphical method of optimization,
or use iterative search (Golden Search) }
imethod:=DisplayButtonBox ('Select a Search Method',
'Manual/visual search (graph of lambda vs. SSE)|Iterative optimization '+
'(golden search)');
if imethod=0 then stop;
if imethod=2 then begin
{ iterative search via golden search method}
Goldensearch(maxiter,x,maxv,var,n,yhat,xx,l,lam,
ygeometricmean,dv,delta,iprint);
end else begin
GridSearch(maxiter,x,maxv,var,n,yhat,xx,l, lam,
ygeometricmean, dv,delta,iprint,
xplot1,xplot2,yplot1,yplot2);
end;
{allow user to round lambda (enter user-specified value) }
if (DisplayNumericInputBox ('Specify lambda value',
'lambda', lam)=0) then stop;
{ recompute SSE(lambda) for lambda=1 (i.e., no transformation;
simple linear relationship); also store error for later plot}
MatrixFill (1,x,1,1, ncases, 1);
for i:=1 to n do MatrixCopy (data, 1, var(i), ncases, 1, x, 1, i+1);
MatrixCopy(data,1,dv,ncases,1,x,1,n+1+1);
{compute X'X}
MatrixCrossProductOfDev (x, 0, xx);
{ sweep on xx}
MatrixSweep (xx, 1, n+1, 1);
sigma1:=xx(n+2,n+2);
{ compute errors for lambda=1 (observed-predicted)}
for i:=1 to ncases do begin
yhat(i):=0;
for j:=1 to n+1 do yhat(i):=yhat(i)+x(i,j)*xx(n+2,j);
end;
MatrixSubtract (data(1,dv), yhat, error0);
{ recompute sse(lambda) for final lambda}
MatrixFill (1,x,1,1, ncases, 1);
for i:=1 to n do MatrixCopy (data, 1, var(i), ncases, 1, x, 1, i+1);
MatrixCopy (data, 1, dv, ncases, 1, yhat, 1, 1);
{Box-Cox transformation}
BCTransform(ygeometricmean,yhat,ncases,lam);
MatrixCopy (yhat, 1, 1, ncases, 1, x, 1, n+1+1);
{compute X'X}
MatrixCrossProductOfDev (x, 0, xx);
{ sweep on xx}
MatrixSweep (xx, 1, n+1, 1);
sigma:=xx(n+2,n+2);
{ make final scrollsheet; note that the Chi-square test statistic pertains to the
hypothesis that lambda=1 }
rownam$:='Final statistics';
colnam$:='lambda| SSE(l) |ChiČ(1)| p ';
table(1,1):=lam;
table(1,2):=sigma;
{see Maddala, p. 316 }
table(1,3):=-2*log((sigma/sigma1)^(ncases/2));
table(1,4):=1-IChi2 (table(1,3), 1);
table(1,5):=0;
table(1,6):=0;
line01$:='Summary Statistics for Box-Cox Lambda|';
line02$:='Dependent variable: '+varname(dv);
line02$:=line02$+'|Indep.:';
for i:=1 to min(n,5) do
line02$:=line02$+' '+varname(var(i));
if n>5 then line02$:=line02$+'...';
shandle:=NewScrollsheet (1, 4, table,
line01$+line02$,
rownam$, colnam$);
if iprint=1 then PrintScrollsheet (shandle);
{produce final summary graphs of observed vs. residuals for lambda=1,
and best (userdefined) lambda}
{ recompute sse(lambda) for final lambda, using 26.8. in Mason, Gunst, Hess}
MatrixFill (1,x,1,1, ncases, 1);
for i:=1 to n do MatrixCopy (data, 1, var(i), ncases, 1, x, 1, i+1);
MatrixCopy (data, 1, dv, ncases, 1, yhat, 1, 1);
for i:=1 to ncases do begin
{Power transformation (see Mason, Gunst, Hess, 26.8}
if abs(lam)<delta then
yhat(i):=log(yhat(i))
else
yhat(i):=yhat(i)^lam;
end;
MatrixCopy (yhat, 1, 1, ncases, 1, x, 1, n+1+1);
MatrixDuplicate (yhat, ytransf);
{compute X'X}
MatrixCrossProductOfDev (x, 0, xx);
{ sweep on xx}
MatrixSweep (xx, 1, n+1, 1);
sigma:=xx(n+2,n+2);
for i:=1 to ncases do begin
yhat(i):=0;
for j:=1 to n+1 do yhat(i):=yhat(i)+x(i,j)*xx(n+2,j);
end;
MatrixSubtract (ytransf, yhat, error);
{first make blank graph
{note; residuals for lambda=1 are in error0; for lambda=lam in error; yhat has
transformed dependent variable values; make embedded multiple graph}
graph:=NewGraph (IGNOREDPLOT, 'Observed vs. Residual Values|'+line02$,
?Left_Title$, ?Bottom_Title$,
0, yhat, yhat);
graph1:=NewGraph (SCATTERPLOT, 'Lambda=1 (No Transformation)',
'Residuals', 'Observed Values', ncases, data(1,dv), error0);
graph2:=NewGraph (SCATTERPLOT, 'Lambda='+Str (lam, 6, 4),
'Residuals', 'Observed Values, @btransformed@b', ncases,
ytransf, error);
GraphSetPlotFitting (Graph1, 1, fit_linear);
GraphSetPlotFitting (Graph2, 1, fit_linear);
GraphSetDefaultFont (graph1, ?FontName$, 14, ?Color);
GraphSetDefaultFont (graph2, ?FontName$, 14, ?Color);
GraphEmbedGraph (Graph, graph1, false, false, ?MappingMode,
0, 0, 50, 80, false);
GraphEmbedGraph (Graph, graph2, false, false, ?MappingMode,
50, 0, 100, 80, false);
DisplayGraph (Graph);
if iprint=1 then PrintGraph (Graph);
{Normal probability plot of residuals}
{first make blank graph}
graph:=NewGraph (IGNOREDPLOT,
'Normal Probability Plots of Residuals|'+line02$,
?Left_Title$, ?Bottom_Title$,
0, yhat, yhat);
VectorSort (error0, SORT_ASCENDING);
MatrixDuplicate (error0, yhat);
VectorRank (yhat, SORT_ASCENDING, RANK_MEAN);
for i:=1 to ncases do begin
yhat(i):=(3*yhat(i)-1)/(3*ncases+1);
yhat(i):=VNormal (yhat(i), 0, 1);
end;
line01$:='Lambda=1 (No Transformation)';
graph1:=NewGraph (SCATTERPLOT, line01$,
'z-Value','Residual', ncases, error0, yhat);
GraphSetPlotFitting (graph1, 1, FIT_LINEAR);
VectorSort (error, SORT_ASCENDING);
MatrixDuplicate (error, yhat);
VectorRank (yhat, SORT_ASCENDING, RANK_MEAN);
for i:=1 to ncases do begin
yhat(i):=(3*yhat(i)-1)/(3*ncases+1);
yhat(i):=VNormal (yhat(i), 0, 1);
end;
line01$:='Lambda='+Str (lam, 6, 4);
graph2:=NewGraph (SCATTERPLOT, line01$,
'z-Value','Residual', ncases, error, yhat);
GraphSetPlotFitting (graph2, 1, FIT_LINEAR);
GraphSetDefaultFont (graph1, ?FontName$, 14, ?Color);
GraphSetDefaultFont (graph2, ?FontName$, 14, ?Color);
GraphEmbedGraph (Graph, graph1, false, false, ?MappingMode,
0, 0, 50, 80, false);
GraphEmbedGraph (Graph, graph2, false, false, ?MappingMode,
50, 0, 100, 80, false);
DisplayGraph (Graph);
if iprint=1 then PrintGraph (Graph);
| Back to List of Programs |
![[StatSoft]](../../../images/sssmall.gif)
2300 East 14th Street, Tulsa, OK 74104
Phone: (918) 749-1119; Fax: (918) 749-2217
e-mail: info@statsoft.com
©Copyright StatSoft, Inc., 1984-2004.
StatSoft, StatSoft logo, STATISTICA, SEWSS, SEDAS, Data Miner, SEPATH and GTrees are trademarks of StatSoft, Inc.