STATISTICA







STATISTICA BASIC Program FilComp.stb

{ This program will compare values in selected variables, in different files and point out mismatched values.

Program written, modified, or edited at StatSoft, Inc.}


randomaccess;
NoDataFileVariableNames;
redim var(nvars);
	largeconst:=-1.e30;
	iret:=DisplayMessageBox (MB_OKCANCEL,
	  'Comparing Files',
	  'This program will compare the values for selected variables in two different
files (the current data file, and another data file). The program assumes that the two
files are of identical size. ');
	if iret=IDCANCEL then stop;
{select variables for comparison}
	iret:=SelectVariables1 ('Select Variables for Comparison',
	  1, nvars, var, n, 'Variables:');
	if iret=0 then stop;
{allocate memory for second data file}
	redim data2(ncases,n);
	redim work1(ncases),work2(ncases);
	MatrixFill (largeconst, data2, 1, 1, ncases, n);
{get second data file name}
	if SelectOpenFileName ('Select Second File', FileName$, '.sta')=0
         then stop;
{copy data from second file into array data2}
	for i:=1 to n do begin
	 MatrixFill (-1.e30, work1, 1, 1, ncases, 1);
	 iret:=MatrixReadFromDataFile (FileName$, 1, var(i), ncases, 1, work1);
	 if iret=0 then begin
	  DisplayMessageBox (MB_ICONSTOP,
	   'Cannot Read File',
	   'Cannot read this data file; please check file name.');
        stop;
 	 end;
	 MatrixCopy (work1, 1, 1, ncases, 1, data2, 1, i);
	end;

{ short or detailed results?}
	iret:=DisplayButtonBox ('Summary Report of Mismatches',
	  'Detailed summary of all mismatched cases|Short summary list of cases and the
variable of first mismatch');
	if iret=0 then stop;
	if iret=1 then ilong:=1;
	if iret=2 then ilong:=0;

{ no perform comparison}
	maxnomatch:=1000;
	redim atcase(maxnomatch),atvar(maxnomatch);
	nomatch:=0;
	for icase:=1 to ncases do begin
	 for ivar:=1 to n do begin
	  ok:=1;
	  if data(icase,var(ivar))<>data2(icase,ivar) then
	   ok:=0
{if there are missing data in either one or both files}
	  undefined begin
	   if valid(data(icase,var(ivar)))<>valid(data2(icase,ivar)) then ok:=0;
	  end;
{is there a no-match?}
	  if not ok then begin
	   nomatch:=nomatch+1;
	   if nomatch>maxnomatch then begin
{increase size of atcases, atvar}
	    maxnomatch:=maxnomatch+1000;
	    redim atcase(maxnomatch),atvar(maxnomatch);
	   end;
	   atcase(nomatch):=icase;atvar(nomatch):=ivar;
{continue with next case}
	   ivar:=n;
	  end
	 end;
	end;

{if no mismatches, terminate}
	if nomatch=0 then begin
       DisplayMessageBox (MB_ICONSTOP,
	   'All Values Match',
	   'No mismatches in the selected variables in the two files.');
	 stop;
	end;

	if ilong=0 then begin
{short summary result table}
	  redim table(nomatch,2);
	  kname$:='CaseName|Variable';
	  MatrixCopy (atcase, 1, 1, nomatch, 1, table, 1, 1);
	  MatrixCopy (atvar, 1, 1, nomatch, 1, table, 1, 2);
	  handle:=NewScrollsheet (nomatch, 2, table,
	  'Non-matching cases in selected variables|Only the first non-matching
variables|are listed for each mismatch', ?RowNames$, kname$);
	  ScrollsheetSetColumnName (handle, 1, 'CaseNo/', 'CaseName');
	  ScrollsheetSetColumnFormat (handle, 1, SCF_INTEGER, 8);
	  ScrollsheetSetColumnFormat (handle, 2, SCF_INTEGER, 8);
	  ScrollsheetSetRowNameWidth (handle, 8);
	  for i:=1 to nomatch do begin
	   ScrollsheetSetRowName (handle, i, str(atcase(i),8,0));
	   ScrollsheetSetTextValue (handle, i, 1, casename(atcase(i)));
	   ScrollsheetSetTextValue (handle, i, 2, varname(atvar(i)));
	  end	;
	  stop;
	end;

	if ilong=1 then begin
{long results table}
	 redim table(nomatch,n*2);
	 for i:=1 to nomatch do begin
	  icase:=atcase(i);
	  for j:=1 to n do begin
	   table(i,(j-1)*2+1):=data(icase,var(j));
	   table(i,(j-1)*2+2):=data2(icase,j);
	  end;
	 end;
	 handle:=NewScrollsheet (nomatch, 2*n, table,
	  'Non-matching cases in selected variables|Mismatched values are
highlighted|Casenames/numbers are listed in first column',
		?RowNames$, '');
	 ScrollsheetSetColumnWidth (handle, 8, 1);
	 for i:=1 to n do begin
	  ScrollsheetSetColumnName (handle, (i-1)*2+1, 'File 1', varname(var(i)));
	  ScrollsheetSetColumnName (handle, (i-1)*2+2, 'File 2', varname(var(i)));
	  for j:=1 to nomatch do begin
	   ok:=1;
	   if table(j,(i-1)*2+1)<>table(j,(i-1)*2+2) then ok:=0;
	   if table(j,(i-1)*2+1)=largeconst then
	     ScrollsheetSetTextValue (Handle, j, (i-1)*2+1, '      ');
	   if table(j,(i-1)*2+2)=largeconst then
	     ScrollsheetSetTextValue (Handle, j, (i-1)*2+2, '      ');
	   if valid(table(j,(i-1)*2+1))<>valid(table(j,(i-1)*2+2))then ok:=0;
	   if ok=0 then begin
	    ScrollsheetSetHilite (Handle, j, (i-1)*2+1, 1);
	    ScrollsheetSetHilite (Handle, j, (i-1)*2+2, 1);
	   end;
	  end;
	 end;
	 ScrollsheetSetRowNameWidth (handle, 8);
	 for i:=1 to nomatch do
	  ScrollsheetSetRowName (handle, i, casename(atcase(i)));
	end;
Back to List of Programs



[StatSoft]
2300 East 14th Street, Tulsa, OK 74104
Phone: (918) 749-1119; Fax: (918) 749-2217

[StatSoft]e-mail: info@statsoft.com

©Copyright StatSoft, Inc., 1984-2004.
StatSoft, StatSoft logo, STATISTICA, SEWSS, SEDAS, Data Miner, SEPATH and GTrees are trademarks of StatSoft, Inc.