STATISTICA







STATISTICA BASIC Program Wls.stb

{ This program will perform a weighted least-squares analysis for the current data file. For computational details, see Neter, Wasserman, & Kutner (1985), page 168; see also the Examples section in the Nonlinear Estimation chapter of the manual.

NOTE: program assumes that all cases are complete (no missing data!);

The program will also compute various alternative R-square statistics; see Kvålseth, 1985, JAS, 39, p. 279-285.

Program written, modified, or edited at StatSoft, Inc.
}

randomaccess;
NoDataFileVariableNames;
	if DisplayMessageBox (MB_OKCANCEL, 'Weighted Least Squares',
		'This program will compute weighted least squares multiple regression
estimates. It is assumed that there are no missing data in the selected variables (if
necessary, use the data management facilities to replace missing data with
means).')=IDCANCEL
		then stop;

	redim var(nvars-1);
{select variables: }
	if (SelectVariables3 ('Select Variables for Weighted Least Squares',
	 1, 1, idv, i, 'Dependent:',
       1, nvars-1, var, n, 'Independent:',
	 1, 1, idw, j, 'Weight var:')=0) then stop;
	 iret:=DisplayMessageBox (MB_YESNOCANCEL, 'Intercept?',
		'Estimate the intercept? (No=force to zero)');
	 if iret=IDCANCEL then stop;
	 if iret=IDYES then inter:=1;
	 if iret=IDNO  then inter:=0;
{check for missing data (data must be complete!) }
	 for i:=1 to ncases do begin
	  iflag:=0;
	  for j:=1 to n do begin
	   if valid(data(i,var(j)))=0 then iflag:=1;
	  end;
	  if valid(data(i,idv))=0 then iflag:=1;
	  if valid(data(i,idw))=0 then iflag:=1;
	  if iflag=1 then i:=ncases;
	 end;
	 if iflag=1 then begin
	  DisplayMessageBox (MB_ICONSTOP, 'Missing Data',
	   'Some cases contain missing data; this program assumes that all cases are
complete.');
	  stop;
	 end;
{redimension arrays for analysis}
	 redim b(n+inter);
	 redim means(n);
	 redim x(ncases,n+inter);
	 redim xprime(n+inter,ncases);
	 redim y(ncases);
	 redim mat(n+inter,n+inter);
	 redim xscrat(n+inter,ncases);
	 redim table(max(n+inter,10),6);
	 redim predicted(ncases);
	 redim Residuals(ncases);
{Fill x}
{set 1 in first column of x for intercept (if requested) }
	if inter=1 then MatrixFill (1, x, 1, 1, ncases, 1);
{copy data for independent variables}
	for i:=1 to n do
	  MatrixCopy (data, 1, var(i), ncases, 1, x, 1, inter+i);
	MatrixTranspose (x, xprime);
	MatrixCopy (data, 1, idv, ncases, 1, y, 1, 1);
{compute X'W, where W is the diagonal weight matrix}
	for i:=1 to ncases do begin;
	 for j:=1 to n+inter do
	  xscrat(j,i):=xprime(j,i)*data(i,idw);
	end;
{compute xscrat*X}
	MatrixMultiply (xscrat,x,mat);
{invert}
	MatrixInverse (mat, mat);
{compute mat*X'}
	MatrixMultiply (mat,xprime,xscrat);
{*W}
	for i:=1 to ncases do begin
	 for j:=1 to n+inter do
	  xscrat(j,i):=xscrat(j,i)*data(i,idw);
	end;
{*Y}
	MatrixMultiply (xscrat,y,b);
{mse }
	mse:=0;mstotal:=0;
	ValMean (y, 1, ncases, mean);
	for i:=1 to ncases do begin;
	 predicted(i):=0;
	 if inter=1 then predicted(i):=b(1);
	 for j:=1 to n do
	   predicted(i):=predicted(i)+b(j+inter)*data(i,var(j));
	 residuals(i):=y(i)-predicted(i);
	 mse:=mse+data(i,idw)*residuals(i)^2;
	end;
	dfe:=ncases-n-inter;
	mse:=mse/dfe;

{regression summary}
	line01$:='Coefficients, DV: '+varname(idv);
	line01$:=line01$+'|Indep:';
	for i:=1 to min(5,n) do line01$:=line01$+' '+varname(var(i));
	if n>5 then line01$:=line01$+'...';
	line01$:=line01$+'|Weight: '+varname(idw);
      kname2$:='Coeff | Std.Err. |t('+Str(dfe, 5, 0)+') | p |-95% Cnf|+95% Cnf';
	t:=VStudent (.975, dfe);
	for i:=1 to n+inter do begin;
	  table(i,1):=b(i);                     			{regression coefficients }
	  table(i,2):=sqrt(mse*mat(i,i));   		      {standard error of coeff.}
	  table(i,3):=table(i,1)/table(i,2);                  {t-value                 }
	  table(i,4):=(1-IStudent (abs(table(i,3)), dfe))*2;  {p-value}
	  table(i,5):=table(i,1)-t*table(i,2);			{confidence interval, lower}
	  table(i,6):=table(i,1)+t*table(i,2);			{confidence interval, upper}
	end;

	shandle:=NewScrollsheet (n+inter, 6, table, line01$, '', kname2$);
{highlight significant effects; set rownames}
	ScrollsheetSetRowNameWidth (shandle, 8);
	if inter=1 then ScrollsheetSetRowName (shandle, 1, 'Interc.');
	for i:=1 to n+inter do begin
	  if i-inter>0 then
          ScrollsheetSetRowName (shandle, i, VarName (var(i-inter)));
	 if table(i,4)<.05 then begin
	  for j:=1 to 6 do ScrollsheetSetHilite (shandle, i, j, 1);
	 end;
	end;
{
Compute various alternative R-square statistics;
  see Kvålseth, 1985, JAS, 39, p. 279-285
}
      kname$:='';
	line01$:='Alternative R-Square Statistics';
	line01$:=line01$+'|(see Kvålseth, 1985, JAS, 39, p. 279-285)';
	MatrixSetToZero (table);
{Compute necessary quantities}
	ValMean (residuals, 1, ncases, ebar);
	ValMean (y, 1, ncases, ybar);
	ValMean (predicted, 1, ncases, yhatbar);
	redim work1(ncases);
	MatrixSubtract (y, predicted, work1);
	MatrixSumOfSquares (work1, yminusyhat);
	MatrixElemSubtract (predicted, ybar, work1);
	MatrixSumOfSquares (work1, yhatminusybar);
	MatrixElemSubtract (predicted, yhatbar, work1);
	MatrixSumOfSquares (work1, yhatminusyhatbar);
	MatrixElemSubtract (residuals, ebar, work1);
	MatrixSumOfSquares (work1, eminusebar);
	MatrixElemSubtract (y, ybar, work1);
	MatrixSumOfSquares (work1, yminusybar);

{R12 : 1-Sum((y-yhat)^2)/Sum((y-ybar)^2}
	kname$:='R-square 1';
	table(1,1):=1-yminusyhat/yminusybar;

{R22 : 1-Sum((yhat-ybar)^2)/Sum((y-ybar)^2}
	kname$:=kname$+'|R-square 2';
	table(2,1):=1-yhatminusybar/yminusybar;

{R32 : 1-Sum((yhat-yhatbar)^2)/Sum((y-ybar)^2}
	kname$:=kname$+'|R-square 3';
	table(3,1):=1-yhatminusyhatbar/yminusybar;

{R42 : 1-Sum((e-ebar)^2)/Sum((y-ybar)^2; where e=y-yhat}
	kname$:=kname$+'|R-square 4';
	table(4,1):=1-eminusebar/yminusybar;

{R72 : squared correlation between y and y-hat}
	kname$:=kname$+'|R-square 6';
	table(5,1):=0;
	redim mat(2,2);
	redim x(ncases,2);
	MatrixCopy (y, 1, 1, ncases, 1, x, 1, 1);
	MatrixCopy (predicted, 1, 1, ncases, 1, x, 1, 2);
	MatrixCorrelations (x, 1, mat);
	table(5,1):=mat(2,1)*mat(2,1);

{Make Scrollsheet}
	NewScrollsheet (5, 1, table, line01$, kname$, 'R-Square');

{Plot of case number versus residuals}
	line01$:='Case No. vs. Residual Values';
	kname$:='Case Number';
	for i:=1 to ncases do work1(i):=i;
	graph:=NewGraph (SCATTERPLOT, line01$, 'Residuals',kname$, ncases,
		work1, residuals);
	GraphSetPlotFitting (graph, 1, FIT_POLYNOMIAL);

{Plot of Observed versus Predicted Values}
	line01$:='Observed vs. Predicted Values';
	kname$:='Observed Values';
	graph:=NewGraph (SCATTERPLOT, line01$, 'Predicted Values',kname$, ncases,
		data(1,idv), predicted);
	GraphSetPlotFitting (graph, 1, FIT_LINEAR);

{Normal probability plot of residuals}
	MatrixCopy(residuals,1,1,ncases,1,work1,1,1);
	VectorSort (work1, SORT_ASCENDING);
	MatrixDuplicate (work1, residuals);
	VectorRank (work1, SORT_ASCENDING, RANK_MEAN);
	for i:=1 to ncases do begin
	 work1(i):=(3*work1(i)-1)/(3*ncases+1);
	 work1(i):=VNormal (work1(i), 0, 1);
	end;
	line01$:='Normal Probability plot of Residuals';
	graph:=NewGraph (SCATTERPLOT, line01$,
          'z-Value','Residual', ncases, residuals, work1);
	GraphSetPlotFitting (graph, 1, FIT_LINEAR);
Back to List of Programs



[StatSoft]
2300 East 14th Street, Tulsa, OK 74104
Phone: (918) 749-1119; Fax: (918) 749-2217

[StatSoft]e-mail: info@statsoft.com

©Copyright StatSoft, Inc., 1984-2004.
StatSoft, StatSoft logo, STATISTICA, SEWSS, SEDAS, Data Miner, SEPATH and GTrees are trademarks of StatSoft, Inc.