english
version "1.0"
identify "@(#)differ.sts 1.2 96/02/25"

#: Copyright (c) 1995, 2002 by Wayne C. Gramlich.
#, All rights reserved.
#,
#, Permission to use, copy, modify, distribute, and sell this software
#, for any purpose is hereby granted without fee provided that the above
#, copyright notice and this permission are retained.  The author makes
#, no representations about the suitability of this software for any purpose.
#, It is provided "as is" without express or implied warranty.

module differ

#: This module implements algorithms for determining the differences
#, between sequences of objects.  This code is parameterized so that
#, it can work on any arbitrary sequence of objects (e.g. vector[item],
#, string, etc.)

define align_type			#: Alignment type
    enumeration
	differ				#: Alignment is different
	same				#: Alignemnt is the same
	smashed				#: Alignment is smashed
    generate equal, print

define align[item]			#: One set of alignments
    record
	differ_table differ_table[item]	#: Parent {differ_table}
	sequence1 sequence[item]	#: First sequence
	sequence2 sequence[item]	#: Second sequence
	type align_type			#: Alignment type
    generate address_get, allocate, erase, identical

define aligns[item]			#: Sequence of {align} objects
    record
	differ_table differ_table[item]	#: Parent {differ_table}
	list vector[align[item]]	#: List of {align} objects
    generate address_get, allocate, erase, identical

define differ_table[item]		#: Some tables for computing diffs
    record
	debug_stream out_stream		#: Debugging stream
	aligns aligns[item]		#: Temp. align list
	aligns_free aligns[item]	#: Free diff. list
	matches vector[match[item]]	#: Temp. match list
	matches_free vector[match[item]] #: Free match list
	match_table table[item, match[item]] #: Match table for unique lines
	table set[sequence[item]]	#: Table for lookups
	sequences vector[sequence[item]] #: Temp. sequences list
	sequences_free vector[sequence[item]] #: Free sequences list
    generate allocate, erase, identical, print

define match[item]			#: On set of matches
    record
	differ_table differ_table[item]	#: Parent {differ_table}
	item item			#: Matching item
	index1 unsigned			#: First matching index
	index2 unsigned			#: Second matching index
    generate address_get, allocate, erase, identical

define sequence[item]			#: A sequence of {item} objects
    record
	differ_table differ_table[item]	#: Parent {differ_table}
	items vector[item]		#: Object sequence is part of
	items_hash unsigned		#: Hash value for {items}
	start unsigned			#: Start offset of sequence
	length unsigned			#: Number of objects in sequence
    generate address_get, allocate, erase, identical



#: {align} procedures:

procedure deallocate@align[item]
    takes
	align align[item]
    returns_nothing

    #: This procedure will deallocate {sequence}.


procedure greater_than@align[item]
    takes
	align1 align[item]
	align2 align[item]
    returns logical

    #: This procedure return {true} if {align1} is greater than {align2}
    #, with no overlap or {false} otherwise.


procedure is_smashed@align[item]
    takes
	align align[item]
    returns logical

    #: This procedure returns {true} if {align} is "smashed" and {false}
    #, otherwise.


procedure less_than@align[item]
    takes
	align1 align[item]
	align2 align[item]
    returns logical

    #: This procedure return {true} if {align1} is less than {align2}
    #, with no overlap or {false} otherwise.


procedure print@align[item]
    takes
	align align[item]
	out_stream out_stream
    returns_nothing


procedure smash@align[item]
    takes
	align align[item]
    returns_nothing

    #: This procedure will mark {align} as an empty sequence.


#: {aligns} procedures:

procedure append@aligns[item]
    takes
	aligns aligns[item]
	align align[item]
    returns_nothing

    #: This procedure will append {align} to {aligns}.


procedure create@aligns[item]
    takes
	differ_table differ_table[item]
    returns aligns[item]

    #: This procedure will create and return a new {aligns} object
    #, containing {differ_table}.


procedure cull@aligns[item]
    takes
	aligns aligns[item]
    returns_nothing

    #: This procedure will remove any {align} objects from {aligns}
    #, that are "smashed".


procedure differ_insert@aligns[item]
    takes
	aligns aligns[item]
	items1 vector[item]
	items2 vector[item]
    returns_nothing

    #: This procedure will add all of the differing {align} objects
    #, {aligns}.


procedure fetch1@aligns[item]
    takes
	aligns aligns[item]
	index unsigned
    returns align[item]

    #: This procedure will return the {index}'th {align} object from {aligns}.


procedure order@aligns[item]
    takes
	aligns aligns[item]
	start unsigned
	length unsigned
    returns_nothing

    #: This procedure will order {aligns} so that all {align} objects
    #, between {start} and {start} + {length} - 1 are ordered before
    #, or after the largest matching region.


procedure pop@aligns[item]
    takes
	aligns aligns[item]
    returns align[item]

    #: This procedure will return the {align} object at the end of {aligns}
    #, and reduce the size of {aligns} by one.


procedure print@aligns[item]
    takes
	aligns aligns[item]
	out_stream out_stream
    returns_nothing

    #: This procedure will print {aligns} to {out_stream}.


procedure size_get@aligns[item]
    takes
	aligns aligns[item]
    returns unsigned

    #: This procedure will return the size of {aligns}.


procedure store1@aligns[item]
    takes
	aligns aligns[item]
	index unsigned
	align align[item]
    returns_nothing

    #: This procedure will store {align} into {aligns}.


procedure truncate@aligns[item]
    takes
	aligns aligns[item]
	new_size unsigned
    returns_nothing

    #: This procedure will trucate {aligns} to only have {new_size}
    #, {align} objects.


procedure verify@aligns[item]
    takes
	aligns aligns[item]
	label string
    returns_nothing

    #: This procedure will verify that {aligns} is well ordered.


#: {differ_table} procedures:

procedure align_allocate@differ_table[item]
    takes
	differ_table differ_table[item]
	type align_type
	items1 vector[item]
	start1 unsigned
	length1 unsigned
	items2 vector[item]
	start2 unsigned
	length2 unsigned
    returns align[item]

    #: This procedure will allocate and return a new {align} object
    #, containing {items1}, {start1}, {length2}, {items2},
    #, {start2}, and {length2}.


procedure align_deallocate@differ_table[item]
    takes
	differ_table differ_table[item]
	align align[item]
    returns_nothing

    #: This procedure will deallocate {align} for {differ_table}.


procedure aligns_find@differ_table[item]
    takes
	differ_table differ_table[item]
	items1 vector[item]
	items2 vector[item]
    returns aligns[item]
    needs
	procedure equal@item
	    takes item, item
	    returns logical
	procedure hash@item
	    takes item
	    returns unsigned
	procedure identical@item
	    takes item, item
	    returns logical

    #: This procedure will return all of the alignments between
    #, {items1} and {items2} using {differ_table} to hold all of
    #, the intermediate state:


procedure aligns_match@differ_table[item]
    takes
	differ_table differ_table[item]
	items1 vector[item]
	items2 vector[item]
    returns aligns[item]
    needs
	procedure equal@item
	    takes item, item
	    returns logical
	procedure hash@item
	    takes item
	    returns unsigned
	procedure identical@item
	    takes item, item
	    returns logical

    #: This procedure will identify and return the initial matching
    #, {align} objects for each matching span in {items1} and {itmes2}
    #, that contains a single uniquely matching {item}.

    #, is to find items that occur exactly once in {items1} and
    #, {items2}.  These items are high probability match ups.
    #, The second step is to use a heavier weight alogrithm to
    #, find matches in the remaining unmatched sections.


procedure create@differ_table[item]
    takes_nothing
    returns differ_table[item]

    #: This procedure will create and return a new {differ_table} object.


procedure match_allocate@differ_table[item]
    takes
	differ_table differ_table[item]
    returns match[item]

    #: This procedure will allocate and return a {match} object from
    #, {differ_table}.


procedure match_deallocate@differ_table[item]
    takes
	differ_table differ_table[item]
	match match[item]
    returns_nothing

    #: This procedure will deallocate {match} into {differ_table}.


procedure match_expand@differ_table[item]
    takes
	differ_table differ_table[item]
	match match[item]
	items1 vector[item]
	items2 vector[item]
    returns align[item]
    needs
	procedure equal@item
	    takes item, item
	    returns logical
	procedure hash@item
	    takes item
	    returns unsigned

    #: This procedure will expand {match} until is spans as many
    #, matching items as possible.

    #: Look for previous matching items:

procedure sequence_allocate@differ_table[item]
    takes
	differ_table differ_table[item]
	items vector[item]
	start unsigned
	length unsigned
    returns sequence[item]

    #: This procedure will allocate and return an uninitialized
    #, {sequence} object from {differ_table}.


procedure sequence_deallocate@differ_table[item]
    takes
	differ_table differ_table[item]
	sequence sequence[item]
    returns_nothing

    #: This procedure will deallocate {sequence} into {differ_table}.


#: {match} procedures:

procedure deallocate@match[item]
    takes
	match match[item]
    returns_nothing

    #: This procedure will deallocate {match}.


procedure print@match[item]
    takes
	match match[item]
	out_stream out_stream
    returns_nothing


procedure smash@match[item]
    takes
	match match[item]
    returns_nothing

    #: This procedure will set the contents of {match} so that it
    #, no longer indicates a match.


#: {sequence} procedures:

procedure deallocate@sequence[item]
    takes
	sequence sequence[item]
    returns_nothing

    #: This procedure will deallocate {sequence}.


procedure equal@sequence[item]
    takes
	sequence1 sequence[item]
	sequence2 sequence[item]
    returns logical
    needs
	procedure equal@item
	    takes item, item
	    returns logical

    #: This procedure will return {true} if {sequence1} equals {sequence2}
    #, and {false} otherwise.


procedure greater_than@sequence[item]
    takes
	sequence1 sequence[item]
	sequence2 sequence[item]
    returns logical

    #: This procedure will return {true} if {sequence1} is greater than
    #, {sequence2} with no overlap; otherwise, {false} is returned.


procedure hash@sequence[item]
    takes
	sequence sequence[item]
    returns unsigned
    needs
	procedure hash@item
	    takes item
	    returns unsigned

    #: This procedure will return a hash value for {sequence}.


procedure less_than@sequence[item]
    takes
	sequence1 sequence[item]
	sequence2 sequence[item]
    returns logical

    #: This procedure will return {true} if {sequence1} is less than
    #, {sequence2} with no overlap; otherwise, {false} is returned.


procedure print@sequence[item]
    takes
	sequence sequence[item]
	out_stream out_stream
    returns_nothing
    needs
	procedure print@item
	    takes item, out_stream
	    returns_nothing

    #: This procedure outputs {sequence} to {out_stream}.


procedure show_prefixed@sequence[item]
    takes
	sequence sequence[item]
	prefix string
	out_stream out_stream
    returns_nothing
    needs
	procedure show_prefixed@item
	    takes item, string, out_stream
	    returns_nothing

    #: This procedure will output {sequence} to {out_stream} with each
    #, {item} prefixed with {prefix}.


procedure smash@sequence[item]
    takes
	sequence sequence[item]
    returns_nothing

    #: This procedure will convert {sequence} into a zero length sequence.