When the scrollsheet is saved as a data file, cases with outliers can be excluded from an analysis easily by case selection conditions on OUTLIER? (exclude if OUTLIER?=1).
Program written, modified, or edited at StatSoft, Inc.}
randomaccess;
{user selects variables to check for outliers}
redim varlist(NVars);
if (SelectVariables1 ("Outlier Analysis: Please pick the variables", 1, NVars, VarList, count, ?Text1$))=0 then abort;
{user defines the outliers}
coeff := 2;
if (DisplayNumericInputBox
('Please define outliers', 'Outliers are how many standard deviations from the mean?', coeff))=0 then abort;
{Let's user specify where the output is headed}
output_choice := DisplayButtonBox ('Direct output to where?', 'Scrollsheets|Text/Output Window');
if (output_choice = 0) then abort;
{data file is displayed as scrollsheet and new column is added to code for cases with outliers}
if (output_choice = 1) then
begin
datatitle$ := 'Marked cases have a value > ' + str(coeff,4,2) + ' std devs from the mean';
datahandle := MatrixDisplay (Data, datatitle$);
for n:=1 to NVars do
ScrollsheetSetColumnName (datahandle, n, VarName(n), ?Name2$);
ScrollsheetAppendColumns (datahandle, 1);
ScrollsheetSetColumnName (datahandle, Nvars+1, "OUTLIER?", ?title2$);
end;
{Big loop so that outlier analysis is performed for all variables selected}
for numvars := 1 to count do
begin
{read the values from that variable into an array}
redim varvalues (NCases);
for counter :=1 to NCases do
begin
varvalues(counter) := Data(counter,varlist(numvars));
end;
{compute mean and standard deviation}
ValMean (varvalues, 1, NCases,mean);
ValStDeviation (varvalues, 1, NCases, std);
{compute the valid range for the chosen variable given the user's selection of
the number of standard deviations from the mean}
low := (mean - (coeff*std));
high := (mean + (coeff*std));
{checking for outliers}
number_outliers := 0;
for counter2 := 1 to NCases do
begin
if ((Data (counter2, varlist(numvars)) < low) or (Data (counter2, varlist(numvars)) > high)) then
begin
number_outliers := number_outliers + 1;
redim outliers(number_outliers);
outliers(number_outliers) := counter2;
end;
end;
{creates array and stores descriptive info about the variable}
redim var_info(4);
var_info(1):=mean;
var_info(2):=std;
var_info(3):=high;
var_info(4):=low;
{Let's user know if there are no outliers for the specified variable}
if ((number_outliers = 0) and (output_choice = 1)) then
DisplayMessageBox (MB_IconExclamation, 'Outlier Analysis', 'No outliers were found for '+
Varname(varlist(numvars)));
{Scrollsheet Output}
{takes care of initial variable}
desc_title$ := 'Descriptives & Values within ' + str(coeff,4,2) + ' Standard Dev(s) of the Mean';
if ((output_choice = 1) and (numvars = 1)) then
begin
desc_handle := NewScrollsheet (4, 1, var_info, desc_title$,
?rownames$, Varname(varlist(numvars)));
outlier_title$ := "Cases with Outliers";
outlier_handle := NewScrollsheet(number_outliers,1, outliers, outlier_title$,?RowNames$,
Varname(varlist(numvars)));
ScrollsheetSetRowNameWidth (desc_handle, 20);
ScrollsheetSetColumnName (outlier_handle, numvars,Varname(varlist(numvars)),'Cases');
high_temp$ := 'Mean +*s.d.';
high$ := sinsert(high_temp$,str(coeff,4,2),7);
low_temp$ := 'Mean -*s.d.';
low$ := sinsert(low_temp$, str(coeff,4,2),7);
ScrollsheetSetRowName (desc_handle, 1, 'Mean');
ScrollsheetSetRowName (desc_handle, 2, 'Standard Deviation');
ScrollsheetSetRowName (desc_handle, 3, high$);
ScrollsheetSetRowName (desc_handle, 4, low$);
ScrollsheetSetColumnFormat (outlier_handle, numvars, SCF_integer, 4);
for d:=1 to number_outliers do
begin
ScrollsheetSetHilite (datahandle, outliers(d), Varlist(numvars), 1);
ScrollsheetSetValue (datahandle, outliers(d), nvars+1, 1);
end;
end;
{takes care of subsequent variables}
If ((output_choice = 1) and (not(numvars = 1))) then
begin
ScrollsheetAppendColumns (outlier_handle, 1);
ScrollsheetSetColumnName (outlier_handle, numvars,Varname(varlist(numvars)),'Cases');
ScrollsheetSetColumnFormat (outlier_handle, numvars, SCF_integer, 4);
Current_size := ScrollsheetGetNbRows (outlier_handle);
If (number_outliers > current_size) then
ScrollsheetAppendRows (outlier_handle, number_outliers - current_size);
for c:=1 to number_outliers do
begin
ScrollsheetSetValue (outlier_handle, c, numvars, outliers(c));
end;
for e:=1 to number_outliers do
begin
ScrollsheetSetHilite (datahandle, outliers(e), varlist(numvars), 1);
ScrollsheetSetValue (datahandle, outliers(e), nvars+1, 1);
end;
ScrollsheetAppendColumns (desc_handle, 1);
ScrollsheetSetColumnName (desc_handle, numvars,?Name$ ,Varname(varlist(numvars)) );
ScrollsheetSetValue (desc_handle, 1, Numvars, mean);
ScrollsheetSetValue (desc_handle, 2, Numvars, std);
ScrollsheetSetValue (desc_handle, 3, Numvars, high);
ScrollsheetSetValue (desc_handle, 4, Numvars, low);
end;
{T/O ouput}
if (output_choice = 2) then
begin
Writeln("");
Writeln ("OUTLIER ANALYSIS OUTPUT");
Writeln("");
Writeln("Variable: ",Varname(varlist(numvars)));
Writeln ("Mean: ", mean, "Standard deviation: ", std);
Writeln ("Values within +/- ", coeff, "standard deviations are from ", low, "to ", high);
Writeln("");
number_outliers := 0;
for counter2 := 1 to NCases do
begin
if ((Data (counter2, varlist(numvars)) < low) or (Data (counter2, varlist(numvars)) > high)) then
begin
writeln ("Case number: ", counter2, "(Case Name: ", CaseName(counter2),")", " is an outlier for ",
varname(varlist(numvars)));
number_outliers := number_outliers + 1;
end;
end;
if (number_outliers = 0) then
writeln ("No outliers were found for ", varname(varlist(numvars)));
end;
end;
{message for scrollsheet output}
if (output_choice = 1) then
DisplayMessageBox (MB_IconExclamation, 'Please note', 'If you selected more than 1 variable, you will need to resize the
scrollsheets to view the output.');
| Back to List of Programs |
![[StatSoft]](../../../images/sssmall.gif)
2300 East 14th Street, Tulsa, OK 74104
Phone: (918) 749-1119; Fax: (918) 749-2217
e-mail: info@statsoft.com
©Copyright StatSoft, Inc., 1984-2004.
StatSoft, StatSoft logo, STATISTICA, SEWSS, SEDAS, Data Miner, SEPATH and GTrees are trademarks of StatSoft, Inc.