STATISTICA







STATISTICA BASIC Program CorrCI(Pairwise).stb

{ This STATISTICA BASIC program will provide the approximate confidence interval for the correlation coefficients of a list of variables. This is done using a normal approximation. To demonstrate the manner in which the confidence intervals are produced, a graph can be drawn. This will show the full range of possible correlation coefficients and the confidence intervals for those values at the given percentage and number of observations. Missing data can be handled by pairwise deletion in this program. For casewise deletion, see the alternative BASIC program CorrCI(Casewise).stb. Note that if the data does not have missing data the casewise deletion program will be much more efficient.

Program written, modified, or edited at StatSoft, Inc.}


RandomAccess;
NoDataFileVariableNames;

ReDim Selection (NVars);
ReDim TempColumn1 (NCases);

{make sure that the data can be analyzed}

if NCases > 32767 then begin
	DisplayMessageBox (MB_OK, 'Data Too Large',
	'This BASIC program can analyze up to 32767 cases.  The data has too many cases.');
	stop;
end;

{obtain variable list}

If 0 =
SelectVariables1 ('Select Variables for Correlation',
	2, NVars, Selection, NSel, 'Which Variables?')
then STOP;

{obtain the percentage for the confidence interval}

PctCI := 95;

If 0 =
DisplayNumericInputBox ('Confidence Interval', 'Select Percent', PctCI)
then STOP;

{find the total number of combinations that will be calculated}

NCalcs := NSel * (NSel - 1) / 2;

ReDim ResultVector(NCalcs,6);
ReDim DisplayVector(NCalcs,6);
ReDim NameVector(NCalcs,2);
ReDim NewData (NCases, NSel);
ReDim VarFlag (NSel);
ReDim MissFlag (NSel);
ReDim TempRow (NSel);
ReDim CorrResult (NSel,NSel);

{get new data matrix from selected variables}

For i:= 1 to NSel do begin
	MatrixGetColumn (Data, Selection(i), TempColumn1);
	MatrixSetColumn (NewData, i, TempColumn1);
end;

{check for missing data}

For i:= 1 to NSel do begin
	MatrixGetColumn (NewData, i, TempColumn1);
	if NCases - ValCount (TempColumn1, 1, 0) > 0 then MissFlag(i) := 1 else MissFlag(i) := 0;
end;

{deal with missing data}

missingcode := 0;

If MatrixAnyNonZero (MissFlag) > 0 then begin
	missingcode := 1;
	if IDCANCEL = DisplayMessageBox (MB_OKCANCEL, 'Missing Data Found',
		'Chose OK for Pairwise Deletion or Cancel to End') then STOP
end;

{check for variables with no variance}

ReDim TempColumn1 (NCases);
ReDim VarFlag (NSel);

For i:= 1 to NSel do begin
	MatrixGetColumn (NewData, i, TempColumn1);
	ValVariance (TempColumn1, 1, 0, varresult);
	if varresult = 0 then VarFlag(i) := 1 else VarFlag(i) := 0;
end;

{if variables have no variance, remove them from calculations}

If MatrixAnyNonZero (VarFlag) > 0 then begin
	ReDim NewData2 (NCases, NSel);
	varcounter := 1;
	for i := 1 to NSel do
		if VarFlag(i) = 0 then begin
			MatrixGetColumn (NewData, i, TempColumn1);
			MatrixSetColumn (NewData2, varcounter, TempColumn1);
			varcounter := varcounter + 1;
		end;

	ReDim CorrResult ((varcounter-1), (varcounter-1));
	ReDim NewData (NCases, (varcounter-1));
	MatrixCopy (NewData2, 1, 1, 0, 0, NewData, 1, 1);

end;

{get number of valid variables remaining}

ValSum (VarFlag, 1, 0, NNoVar);
NewNSel := NSel - NNoVar;

{get number of calculations for valid variables}

NewNCalcs := NewNSel * (NewNSel - 1) / 2;

{if only one variable has variance then no correlations can be computed}

if NewNSel < 2 then begin
	DisplayMessageBox (MB_OK, 'Error', 'Less Than Two Valid Variables Remain.');
	STOP;
end;

{now extract results and compute confidence interval / p-value}

ReDim TempRow (2);
ReDim TempColumn2 (NCases);
ReDim TempCorrMatrix (NCases, 2);
ReDim TempCorrMatrix2 (NCases, 2);
ReDim TempCorrResult (2, 2);

counter := 1;
for i := 1 to (NewNSel - 1) do begin
	for j := (i + 1) to NewNSel do begin
		MatrixGetColumn (NewData, i, TempColumn1);
		MatrixGetColumn (NewData, j, TempColumn2);
		MatrixCombineHoriz (TempColumn1, TempColumn2, TempCorrMatrix);

{do pairwise deletion}
		paircounter := 1;
		for k:= 1 to NCases do begin
			MatrixGetRow (TempCorrMatrix, k, TempRow);
			If 2 - ValCount (TempRow, 1, 0) = 0 then begin
				MatrixSetRow (TempCorrMatrix2, paircounter, TempRow);
				paircounter := paircounter + 1;
			end;
		end;
	ReDim TempCorrMatrix ((paircounter-1), 2);
	MatrixCopy (TempCorrMatrix2, 1, 1, 0, 0, TempCorrMatrix, 1, 1);


	ResultVector(counter,1) := (paircounter-1);

	ReDim TempColumn1 ((paircounter-1));
	ReDim TempColumn2 ((paircounter-1));

	MatrixGetColumn (TempCorrMatrix, 1, TempColumn1);
	ValVariance (TempColumn1, 1, 0, TempVar1);

	MatrixGetColumn (TempCorrMatrix, 2, TempColumn2);
	ValVariance (TempColumn2, 1, 0, TempVar2);

	if (TempVar1 = 0) or (TempVar2 = 0) then
		TempCorrResult(2,1) := 1/0
	else
		MatrixCorrelations (TempCorrMatrix, 1, TempCorrResult);

	ResultVector(counter,2) := TempCorrResult(2,1);

	zscore := Log ((1 + TempCorrResult(2,1)) / (1 - TempCorrResult(2,1))) / 2;
	minus := TempCorrResult(2,1) / (2 * (ResultVector(counter,1) - 1));
	range := (VNormal ((1 - ((100 - PctCi)/200)), 0, 1)) / (Sqrt (ResultVector(counter,1) - 3));

	transLCL := zscore - minus - range;
	transUCL := zscore - minus + range;

	if TempCorrResult (1,2) > -.999999999999999 then begin
		ResultVector(counter,3) := (Exp (2 * transLCL) - 1) / (Exp (2 * transLCL) + 1);
		ResultVector(counter,4) := (Exp (2 * transUCL) - 1) / (Exp (2 * transUCL) + 1);
	end else begin
		ResultVector(counter,3) := 1/0;
		ResultVector(counter,4) := 1/0;
	end;

	if Valid (TempCorrResult(2,1)) = 0 then begin
		ResultVector(counter,1) := TempCorrResult(2,1);
		ResultVector(counter,3) := TempCorrResult(2,1);
		ResultVector(counter,4) := TempCorrResult(2,1);
	end;

	ResultVector(counter,5) := TempCorrResult(1,2) * Sqrt (ResultVector(counter,1) - 2) /
		(Sqrt (1 - (TempCorrResult(1,2)*TempCorrResult(1,2))));
	ResultVector(counter,6) := 2 *
		(1 - IStudent (abs(ResultVector(counter,5)), (ResultVector(counter,1) - 2)));

	counter := counter + 1;

	ReDim TempColumn1 (NCases);
	ReDim TempColumn2 (NCases);
	ReDim TempCorrMatrix (NCases, 2);
	end;
end;

displaycounter := 1;
resultcounter := 1;

{tidy results for display}

for i := 1 to (NSel-1) do
	for j := (i+1) to NSel do begin
		NameVector (displaycounter, 1) := i;
		NameVector (displaycounter, 2) := j;
		if (VarFlag(i) = 1) or (VarFlag(j) = 1) then begin
			DisplayVector (displaycounter, 1) := 1/0;
			DisplayVector (displaycounter, 2) := 1/0;
			DisplayVector (displaycounter, 3) := 1/0;
			DisplayVector (displaycounter, 4) := 1/0;
			DisplayVector (displaycounter, 5) := 1/0;
			DisplayVector (displaycounter, 6) := 1/0;
		end
		else begin
			DisplayVector (displaycounter, 1) := ResultVector (resultcounter, 1);
			DisplayVector (displaycounter, 2) := ResultVector (resultcounter, 2);
			DisplayVector (displaycounter, 3) := ResultVector (resultcounter, 3);
			DisplayVector (displaycounter, 4) := ResultVector (resultcounter, 4);
			DisplayVector (displaycounter, 5) := ResultVector (resultcounter, 5);
			DisplayVector (displaycounter, 6) := ResultVector (resultcounter, 6);
			resultcounter := resultcounter + 1;
		end;
		if DisplayVector (displaycounter, 1) < 3 then begin
			DisplayVector (displaycounter, 1) := 1/0;
			DisplayVector (displaycounter, 2) := 1/0;
			DisplayVector (displaycounter, 3) := 1/0;
			DisplayVector (displaycounter, 4) := 1/0;
			DisplayVector (displaycounter, 5) := 1/0;
			DisplayVector (displaycounter, 6) := 1/0;
		end;
		displaycounter := displaycounter + 1;
	end;

{make the Scrollsheet}

if missingcode = 0 then TopTitle$ := 'Correlations and CI|'
	else TopTitle$ := 'Correlations and CI (Pairwise Deletion)|';

TopTitle$ := TopTitle$ + 'Highlighted Correlations do not include|Zero in the '
	+ Str (PctCI, 2, 0) + '% Confidence Interval';

display := NewScrollsheet (NCalcs, 6, DisplayVector, TopTitle$, ?RowNames$,
	'N|Corr.|-' + Str (PctCI, 2, 0) + '% CI|+' + Str (PctCI, 2, 0) + '% CI|t-score| ');

{format the Scrollsheet}

ScrollsheetSetRowNameWidth (display, 18);
ScrollsheetSetColumnFormat (display, 1, SCF_INTEGER, 5);
ScrollsheetSetColumnName (display, 6, '2-sided', 'p-value');

{make row labels and highlighting where appropriate}

for i := 1 to NCalcs do begin
	NoLetters := Len (VarName (Selection(NameVector(i,1))));
	if NoLetters = 8 then ThisCaseName$ := VarName (Selection(NameVector(i,1))) + '  '
		+ VarName (Selection(NameVector(i,2)))
	else begin
		ThisCaseName$ := '';
		for j := NoLetters to 7 do ThisCaseName$ := ThisCaseName$ + ' ';
		ThisCaseName$ := ThisCaseName$ + VarName (Selection(NameVector(i,1))) + '  '
		+ VarName (Selection(NameVector(i,2)));
	end;
	ScrollsheetSetRowName (display, i, ThisCaseName$);
	ScrollsheetGetValue (display, i, 3, val1);
	if val1 > 0 then ScrollsheetSetHilite (display, i, 2, 1);
	ScrollsheetGetValue (display, i, 4, val2);
	if val2 < 0 then ScrollsheetSetHilite (display, i, 2, 1);
	ScrollsheetGetValue (display, i, 2, val3);
	if (val3 = 1) or (val3 < -.9999999999999)	then ScrollsheetSetHilite (display, i, 2, 1);
	ScrollsheetGetValue (display, i, 6, val4);
	if val4 < (1 - (PctCI / 100)) then ScrollsheetSetHilite (display, i, 6, 1);
end;



{make text list to offer all N in the graph}

ReDim ValidNCases(NewNCalcs);
ReDim CasesToOffer(NewNCalcs);

MatrixExtract (ResultVector, 1, 1, NewNCalcs, 1, ValidNCases);

VectorSort (ValidNCases, SORT_ASCENDING);

ShowCounter := 1;
for i := 1 to NewNCalcs do begin
	if i = 1 then
		if ValidNCases(1) > 2 then begin
			CasesToOffer(ShowCounter) := ValidNCases(1);
			ShowCounter := ShowCounter + 1;
		end;
	if i > 1 then
		if (ValidNCases(i) > ValidNCases(i - 1)) and (ValidNCases(i) > 2) then begin
			CasesToOffer(ShowCounter) := ValidNCases(i);
			ShowCounter := ShowCounter + 1;
		end;
end;

ReDim CasesToOffer(ShowCounter - 1);

{create a graph to show the bulge in the confidence limits}

for i := 1 to (ShowCounter - 1) do
		if CasesToOffer (i) > 0 then ValidText$ := ValidText$ + Str (CasesToOffer(i), 8, 0) + '|';

NEXTGRAPH:

ChosenEntry := 0;

if 0 = DisplaySelectionBox ('Choose Number of Cases to Display', ValidText$, ChosenEntry, 1)
	then STOP;

GetDelimitedString (ValidText$, ChosenEntry, ChosenString$);

ChosenNCases := Val (ChosenString$);

ReDim RValue (251);
ReDim NewLRValue (251);
ReDim NewRValue (251);
ReDim NewURValue (251);


for i := 0 to 250 do begin

	r := i/125 - 1;

	z := Log((1+r)/(1-r))/2;
	minus := r/(2*(ChosenNCases-1));
	u := VNormal ((1-((100-PctCI)/200)), 0, 1);
	range := u/(Sqrt (ChosenNCases-3));


{Z transformed range}
	UCL := z-minus+range;
	LCL := z-minus-range;

{transform back}
	rUCL := (Exp(2*UCL)-1)/(Exp(2*UCL)+1);
	rLCL := (Exp(2*LCL)-1)/(Exp(2*LCL)+1);

	RValue (i+1) := r;
	NewLRValue (i+1) := rLCL;
	NewRValue (i+1) := r;
	NewURValue (i+1) := rUCL;

end;

CIgraph := NewGraph (LINEPLOT, 'Display of Confidence Limit Ranges|'
	+ Str (ChosenNCases, 6, 0) + ' Cases at ' + Str (PctCI, 4, 1) + '% Confidence Interval',
	?Title$, ?Title$, 251, RValue, NewRValue);

GraphAddPlot (CIgraph, LINEPLOT, ?Name$, 251, RValue, NewLRValue);
GraphAddPlot (CIgraph, LINEPLOT, ?Name$, 251, RValue, NewURValue);

GraphSetScaling (CIgraph, AX_X, SCALING_MANUAL_0, -1, 1, .2);
GraphSetScaling (CIgraph, AX_Y, SCALING_MANUAL_0, -1, 1, .2);

GraphSetPlotLineStyle (CIgraph, 1, ON, L_SOLID, ?Size, RED);
GraphSetPlotLineStyle (CIgraph, 2, ON, L_SHORT_DASH, ?Size, BLACK);
GraphSetPlotLineStyle (CIgraph, 3, ON, L_SHORT_DASH, ?Size, BLACK);

DisplayGraph (CIgraph);

GOTO NEXTGRAPH;

Back to List of Programs



[StatSoft]
2300 East 14th Street, Tulsa, OK 74104
Phone: (918) 749-1119; Fax: (918) 749-2217

[StatSoft]e-mail: info@statsoft.com

©Copyright StatSoft, Inc., 1984-2004.
StatSoft, StatSoft logo, STATISTICA, SEWSS, SEDAS, Data Miner, SEPATH and GTrees are trademarks of StatSoft, Inc.