3 Fcom compares two files in either a line-by-line mode or in a strict
6 The byte-by-byte mode is simple; merely read both files and print the
7 offsets where they differ and the contents.
9 The line compare mode attempts to isolate differences in ranges of lines.
10 Two buffers of lines are read and compared. No hashing of lines needs
11 to be done; hashing only speedily tells you when things are different,
12 not the same. Most files run through this are expected to be largely
13 the same. Thus, hashing buys nothing.
16 ***********************************************************************
17 The algorithm that immediately follows does not work. There is an error
18 somewhere in the range of lines 11 on. An alternative explanation follows.
20 ************************************************************************
23 [1] If both buffers are empty then
25 [2] Adjust buffers so 1st differing lines are at top.
26 [3] If buffers are empty then
29 This is the difficult part. We assume that there is a sequence of inserts,
30 deletes and replacements that will bring the buffers back into alignment.
35 [7] If buffer1[xc] and buffer2[yp] begin a "sync" range then
36 [7.1] Output lines 1 through xc-1 in buffer 1
37 [7.2] Output lines 1 through yp-1 in buffer 2
38 [7.3] Adjust buffer 1 so line xc is at beginning
39 [7.4] Adjust buffer 2 so line yp is at beginning
41 [8] If buffer1[xp] and buffer2[yc] begin a "sync" range then
42 [8.1] Output lines 1 through xp-1 in buffer 1
43 [8.2] Output lines 1 through yc-1 in buffer 2
44 [8.3] Adjust buffer 1 so line xp is at beginning
45 [8.4] Adjust buffer 2 so line yc is at beginning
51 [10.3] if xc > number of lines in buffer 1 then
52 [10.4] xc = number of lines
57 [11.3] if yc > number of lines in buffer 2 then
58 [11.4] yc = number of lines
60 [12] if not xd or not yd then
63 At this point there is no possible match between the buffers. For
66 [13] Display error message.
70 This is a variation of the Largest Common Subsequence problem. A
71 detailed explanation of this can be found on p 189 of Data Structures
72 and Algorithms by Aho Hopcroft and Ulman.
76 FC maintains two buffers within which it tries to find the Largest Common
77 Subsequence (The largest common subsequence is simply the pattern in
78 buffer1 that yields the most matches with the pattern in buffer2, or the
79 pattern in buffer2 that yields the most matches with the pattern in buffer1)
81 FC makes a simplifying assumption that the contents of one buffer can be
82 converted to the contents of the other buffer by deleting the lines that are
83 different between the two buffers.
85 Two indices into each buffer are maintained:
87 xc, yc == point to the last line that has been scanned up to now
89 xp, yp == point to the first line that has not been exhaustively
90 compared to lines 0 - #c in the other buffer.
92 FC now makes a second simplifying assumption:
93 It is unnecessary to do any calculations on lines that are equal.
95 Hence FC scans File1 and File two line by line until a difference is
99 When a difference is encountered the two buffers are filled such that
100 the line containing the first difference heads the buffer. The following
101 exhaustive search algorithm is applied to find the first "sync" occurance.
102 (The below is simplified to use == for comparison. In practice more than
103 one line needs to match for a "sync" to be established).
106 FOR xc,yc = 1; xc,yx <= sizeof( BUFFERS ); xc++, yc++
108 FOR xp,yp = 1; xp,yp <= xc,yc; xp++, yp++
110 IF ( BUFFER1[xp] == BUFFER2[yc] )
112 Then the range of lines BUFFER1[ 1 ... xp ] and
113 BUFFER2[ 1 ... yc ] need to be deleted for the
114 two files to be equal. Therefore DISPLAY these
115 ranges, and begin scanning both files starting at
119 IF ( BUFFER1[yp] == BUFFER2[xc] )
121 Then the range of lines BUFFER2[ 1 ... yp ] and
122 BUFFER1[ 1 ... xc ] need to be deleted for the
123 two files to be equal. Therefore DISPLAY these
124 ranges, and begin scanning both files starting at
130 If a match is not found within the buffers, the message "RESYNC FAILED"
131 is issued and further comparison is aborted since there is no valid way
132 to find further matching lines.
143 Certain flags may be set to modify the behavior of the comparison:
145 -a abbreviated output. Rather than displaying all of the modified
146 ranges, just display the beginning, ... and the ending difference
147 -b compare the files in binary (or byte-by-byte) mode. This mode is
148 default on .EXE, .OBJ, .LIB, .COM, .BIN, and .SYS files
149 -c ignore case on compare (cmp = strcmpi instead of strcmp)
150 -l compare files in line-by-line mode
151 -lb n set the size of the internal line buffer to n lines from default
153 -w ignore blank lines and white space (ignore len 0, use strcmps)
154 -t do not untabify (use fgets instead of fgetl)
155 -n output the line number also
156 -NNNN set the number of lines to resynchronize to n which defaults
157 to 2. Failure to have this value set correctly can result in
166 with default sync of 2 yields: with sync => 3 yields:
190 This program makes use of GOTO's and hence is not as straightforward
191 as it could be! CAVEAT PROGRAMMER.
213 /* #define DEBUG FALSE */
218 extern byte toupper ();
220 int (*funcRead) (), /* function to use to read lines */
221 (*fCmp) (); /* function to use to compare lines */
232 int ctSync = -1, /* number of lines required to sync */
233 cLine = -1; /* number of lines in internal buffs */
235 flagType fAbbrev = FALSE, /* abbreviated output */
236 fBinary = FALSE, /* binary comparison */
237 fLine = FALSE, /* line comparison */
238 fNumb = FALSE, /* display line numbers */
239 fCase = TRUE, /* case is significant */
240 fIgnore = FALSE; /* ignore spaces and blank lines */
244 flagType fDebug = FALSE;
247 struct lineType *buffer1,
250 byte line[MAXARG]; /* single line buffer */
252 byte *extBin[] = { ".EXE", ".OBJ", ".LIB",
253 ".COM", ".BIN", ".SYS", NULL };
271 extern byte _osmajor, _osminor;
272 word version; /* _osmajor._osminor, used for */
273 /* version binding checks. */
277 /* Issue error message if DOS version is not within valid range. */
278 version = ((word)_osmajor << 8) + (word)_osminor;
279 if (( LOWVERSION > version) || (version > HIGHVERSION))
284 funcRead = (int (*) ())FNADDR(fgetl);
288 for (i=1; i < c ; i++)
291 * If argument doesn't begin with a /, parse a filename off of it
292 * then examine the argument for following switches.
297 slash= strpbrk( v[i],"/" );
303 strcpy(n[fileargs++],v[i]);
307 strcpy(n[fileargs++],v[i]);
310 for ( j=0 ; j < strlen( v[i] ) ; j++)
314 switch(toupper( *(v[i]+j+1)))
334 if (toupper(*(v[i]+j+2))=='B')
336 cLine = ntoi ((v[i]+j+3),10);
346 funcRead =(int (*) ())FNADDR(fgets);
349 if (*strbskip((v[i]+j+1),"0123456789") == 0)
351 ctSync = ntoi ((v[i]+j+1), 10);
359 } /* end parse of argument for '/' */
360 } /* End ARGUMENT Search */
375 if (!fBinary && !fLine)
377 extention (n[0], line);
379 for (i = 0; extBin[i]; i++)
380 if (!strcmpi (extBin[i], line))
387 if (fBinary && (fLine || fNumb))
393 fCmp = FNADDR(strcmps);
395 fCmp = FNADDR(strcmpis);
400 fCmp = FNADDR(strcmp);
402 fCmp = FNADDR(strcmpi);
406 BinaryCompare (n[0], n[1]);
408 LineCompare (n[0], n[1]);
416 printf ("fc: %s\n", p);
423 BinaryCompare (f1, f2)
424 unsigned char *f1, *f2;
433 if ((fh1 = fopen (f1, "rb")) == NULL)
435 sprintf (line, BadOpn, f1, error ());
439 if ((fh2 = fopen (f2, "rb")) == NULL)
441 sprintf (line, BadOpn, f2, error ());
448 if ((c1 = getc (fh1)) != EOF)
450 if ((c2 = getc (fh2)) != EOF)
457 printf ("%08lX: %02X %02X\n", pos, c1, c2);
462 sprintf (line, LngFil, f1, f2);
468 if ((c2 = getc (fh2)) == EOF)
477 sprintf (line, LngFil, f2, f1);
485 /* compare a range of lines */
486 flagType compare (l1, s1, l2, s2, ct)
493 printf ("compare (%d, %d, %d, %d, %d)\n", l1, s1, l2, s2, ct);
496 if (ct == 0 || s1+ct > l1 || s2+ct > l2)
504 printf ("'%s' == '%s'? ", buffer1[s1].text, buffer2[s2].text);
507 if ((*fCmp)(buffer1[s1++].text, buffer2[s2++].text))
527 unsigned char *f1, *f2;
530 int l1, l2, i, xp, yp, xc, yc;
531 flagType xd, yd, fSame;
536 if ((fh1 = fopen (f1, "rb")) == NULL)
538 sprintf (line, BadOpn, f1, error ());
542 if ((fh2 = fopen (f2, "rb")) == NULL)
544 sprintf (line, BadOpn, f2, error ());
548 if ((buffer1 = (struct lineType *)malloc (cLine * (sizeof *buffer1))) == NULL ||
549 (buffer2 = (struct lineType *)malloc (cLine * (sizeof *buffer1))) == NULL)
558 printf ("At scan beginning\n");
561 l1 += xfill (buffer1+l1, fh1, cLine-l1, &line1);
562 l2 += xfill (buffer2+l2, fh2, cLine-l2, &line2);
564 if (l1 == 0 && l2 == 0)
572 for (i=0; i < xc; i++)
574 if (!compare (l1, i, l2, i, 1))
581 l1 = adjust (buffer1, l1, i);
582 l2 = adjust (buffer2, l2, i);
584 /* KLUDGE ALERT!! GOTO USED */
585 if (l1 == 0 && l2 == 0)
588 l1 += xfill (buffer1+l1, fh1, cLine-l1, &line1);
589 l2 += xfill (buffer2+l2, fh2, cLine-l2, &line2);
593 printf ("buffers are adjusted, %d, %d remain\n", l1, l2);
604 printf ("Trying resync %d,%d %d,%d\n", xc, xp, yc, yp);
607 i = min (l1-xc,l2-yp);
610 if (compare (l1, xc, l2, yp, i))
613 printf ("***** %s\n", f1);
614 dump (buffer1, 0, xc);
615 printf ("***** %s\n", f2);
616 dump (buffer2, 0, yp);
617 printf ("*****\n\n");
619 l1 = adjust (buffer1, l1, xc);
620 l2 = adjust (buffer2, l2, yp);
622 /* KLUDGE ALERT!! GOTO USED */
625 i = min (l1-xp, l2-yc);
628 if (compare (l1, xp, l2, yc, i))
631 printf ("***** %s\n", f1);
632 dump (buffer1, 0, xp);
633 printf ("***** %s\n", f2);
634 dump (buffer2, 0, yc);
635 printf ("*****\n\n");
637 l1 = adjust (buffer1, l1, xp);
638 l2 = adjust (buffer2, l2, yc);
640 /* KLUDGE ALERT!! GOTO USED */
668 if (l1 >= cLine || l2 >= cLine)
669 printf ("%s", ReSyncMes);
671 printf ("***** %s\n", f1);
672 dump (buffer1, 0, l1-1);
673 printf ("***** %s\n", f2);
674 dump (buffer2, 0, l2-1);
675 printf ("*****\n\n");
681 /* return number of lines read in */
682 xfill (pl, fh, ct, plnum)
692 printf ("xfill (%04x, %04x)\n", pl, fh);
696 while (ct-- && (*funcRead) (pl->text, MAXARG, fh) != NULL)
698 if (funcRead == (int (*) ())FNADDR(fgets))
699 pl->text[strlen(pl->text)-1] = 0;
700 if (fIgnore && !strcmps (pl->text, ""))
702 if (strlen (pl->text) != 0 || !fIgnore)
712 printf ("xfill returns %d\n", i);
719 /* adjust returns number of lines in buffer */
728 printf ("adjust (%04x, %d, %d) = ", pl, ml, lt);
730 printf ("%d\n", ml-lt);
738 printf ("move (%04x, %04x, %04x)\n", &pl[lt], &pl[0], sizeof (*pl)*(ml-lt));
741 Move ((unsigned char far *)&pl[lt], (char far *)&pl[0], sizeof (*pl)*(ml-lt));
747 * dump outputs a range of lines.
750 * pl pointer to current lineType structure
751 * start starting line number
752 * end ending line number
758 dump (pl, start, end)
762 if (fAbbrev && end-start > 2)
777 * pline prints a single line of output. If the /n flag
778 * has been specified, the line number of the printed text is added.
781 * pl pointer to current lineType structure
782 * fNumb TRUE if /n specified
789 printf ("%5d: ", pl->line);
791 printf ("%s\n", pl->text);
795 * strcmpi will compare two string lexically and return one of
797 * - 0 if the strings are equal
798 * - 1 if first > the second
799 * - (-1) if first < the second
801 * This was written to replace the run time library version of
802 * strcmpi which does not correctly compare the european character set.
803 * This version relies on a version of toupper which uses IToupper.
806 int strcmpi(str1, str2)
807 unsigned char *str1, *str2;
809 unsigned char c1, c2;
811 while ((c1 = toupper(*str1++)) == (c2 = toupper(*str2++))) {
823 /* compare two strings, ignoring white space, case is significant, return
824 * 0 if identical, <>0 otherwise
827 unsigned char *p1, *p2;
845 /* compare two strings, ignoring white space, case is not significant, return
846 * 0 if identical, <>0 otherwise
848 int strcmpis (p1, p2)
849 unsigned char *p1, *p2;
856 if (toupper (*p1) == toupper (*p2))