english
version "1.0"
identify "xyz"

#: Copyright (c) 1991-2005 by Wayne C. Gramlich.
#, All rights reserved.

module string

#: The string module implements variable length mutable string objects.

#: In STIPPLE, strings have the following characteristics:
#,    -	Strings are a sequence of zero, one, or more 32-bit characters.
#,    -	The NULL character (i.e. "\0") is not treated specially; it is
#,	it is a 32-bit character just like all other 32-bit characters.
#,	(This is unlike ANSI-C, where NULL is typcially used as a
#,	string terminator.)
#,    -	Strings are mutable in that both their contents, length, and mode
#,	may vary over time.
#,    -	Strings are in one of three modes -- {permanent_read_only},
#,	{temporary_read_only}, or {read_write}.  When a string is in the
#,	one of the two read-only modes, any string operations that attempt
#,	to modify the string contents or length will fail by signaling
#,	{read_only_string}.  While a string's mode can be switched between
#,	{temporary_read_only} and {read_write} an arbitrary number of times,
#,	once a string is in {permanent_read_only} mode, it can not be
#,	taken out that mode.
#,    -	The compiler stores all string literals in a read-only segment
#,	with a mode of {permanent_read_only}.
#,
#, The standard implementation of strings in STIPPLE have the following
#, additional characteristics:
#,    -	Strings are optimized to efficiently store strings that only
#,	consist of either entirely 8-bit or entirely 16-bit characters.
#,	characters.
#,    -	Internally, strings are terminated with a NULL character to simplify
#,	interfacing to standard ANSI-C library facilities.
#,    -	Relatively efficient sub-string insertion and deletion is supported.
#,	While sub-string insertion and deletion in same the region of a
#,	string will be efficient (i.e. locality of insertion and deletion),
#,	random sub-string insertion and deletion may not be efficient at all.
#,    -	Strings that are created in the {permanent_read_only} mode will
#,	use less storage than the other modes.

define string
    external

define string_mode
    enumeration
	permanent_read_only
	temporary_read_only
	read_write
    generate equal, hash, print, unsigned_convert

define string_data
    record
	back_slash character
	buffer string
	double_quote character
	initialized logical
	lower_b character
	lower_c character
	lower_d character
	lower_k character
	lower_l character
	lower_p character
	lower_q character
	lower_s character
	lower_w character
	new_line character
	percent character
	single_quote character
	space character
	tab character
	upper_c character
	upper_s character
	zero_offset unsigned
    generate allocate, erase, print



procedure address_get@string
    takes
	string string
    returns address
    external string__address_get

    #: This procedure returns the address associated with {string}.



procedure allocate@string
    takes_nothing
    returns string
    external string__create

    #: This procedure will create and return an empty string.
    #, The returned string will be in mode {read_write}.



procedure buffer_append@string
    takes
	text string
	buffer string
    returns_nothing

    #: This procedure will append {text} to {string}.  It has the
    #, same functionality as {string_append}@{string}, only with
    #, transposed arguments.


procedure buffer_fetch@string
    takes
	string string
	index unsigned
    returns unsigned

    #: This procedure will return the {index}'th word from {string}'s buffer.


procedure buffer_size_get@string
    takes
	string string
    returns unsigned

    #: This procedure will return the buffer size associated with
    #, {string}.  This procedure is for use by {save@buffer} only.


procedure capitalize@string
    takes
	string string
    returns_nothing

    #: This procedure will convert the first character in {string}
    #, to upper case.


procedure character_append@string
    takes
	string string
	character character
    returns_nothing

    #: This procedure will append {character} to the end of {string}.


procedure character_delete@string
    takes
	string string
	offset unsigned
    returns_nothing

    #: This procedure will delete the character at {offset} in {string}.


procedure character_insert@string
    takes
	string string
	offset unsigned
	character character
    returns_nothing


procedure character_gap_insert@string
    takes
	string string
	character character
    returns_nothing

    #: This procedure will insert {character} into in {string}
    #, at the gap offset.
    

procedure character_lop@string
    takes
	string string
    returns character

    #: This procedure will remove and return the first character from {string}.


procedure character_search@string
    takes
	string string
	character character
    returns unsigned

    #: This procedure will searh {string} from the beginning for the first
    #, occurrance of {character} and return the index.  If {character} is
    #, not found, 0xffffffff is returned.


procedure compare@string
    takes
	string1 string
	string2 string
    returns integer


procedure concat@string
    takes
	left_string string
	right_string string
    returns string

    #: This routine will return a new string that consists of the
    #, concatenation of {left_string} and {right_string}.


procedure concat3@string
    takes
	string1 string
	string2 string
	string3 string
    returns string

    #: This routine will return a new string that consists of the
    #, concatenation of {string1}, {string2}, and {string3}.


procedure copy@string
    takes
	string string
    returns string

    #: This procedure will return a copy of {string}.
    #, The returned string will have the same mode as {string}.


procedure copy_mode@string
    takes
	string string
	mode string_mode
    returns string

    #: This procedure will return a copy of {string}.
    #, The returned string will be in mode {mode}.


procedure copy_read_only@string
    takes
	string string
    returns string

    #: This procedure will return a permanent read-only copy of {string}.


procedure create@string
    takes_nothing
    returns string

    #: This procedure will create and return a empty read/write {string}.
    

procedure xcreate_initialized@string
    takes
	character character
	size unsigned
    returns string

    #: This procedure will create and return a string of {size} characters.
    #, Each character in the returned string is  initialized to {character}.
    #, The returned string will be in mode {read_write}.

    #return create_initialized@string(character, size)



procedure create_initialized@string
    takes
	character character
	size unsigned
    returns string

    #: This procedure will create and return a string of {size} characters.
    #, Each character in the returned string is initialized to {character}.
    #, The returned string will be in mode {read_write}.


procedure delete@string
    takes
	string string
	offset unsigned
	length unsigned
    returns_nothing

    #: This procedure will the characters from {offset} through {offset} +
    #, {length} - 1 (inclusive) in {string}.


procedure equal@string
    takes
	string1 string
	string2 string
    returns logical

    #: This procedure will return {true}@{logical} if {string1} is equal
    #, to {string2} and {false}@{logical} otherwise.


procedure float_append@string
    takes
	string string
	number float
    returns float

    #: This procedure will convert {number} into a floating point number
    #, and append it to {string).


procedure float_convert@string
    takes
	arg string
    returns float
    external string__float_convert

    #: This procedure will convert {arg} from a decimal floating point
    #, number and return it as a {float}.



procedure form@string
    takes
	text string
	buffer string
    returns_nothing


procedure form_begin@string
    takes
	buffer string
    returns unsigned


procedure form_end@string
    takes
	buffer string
	anchor unsigned
    returns_nothing


procedure form_prepare@string
    takes
	buffer string
	form string
    returns_nothing


procedure format@string
    takes
	string string
	out_stream out_stream
	format string
	offset unsigned
    returns_nothing

    #: This routine will output {string} to {out_stream} using the formatting
    #, characters in {format} starting at {offset} until a terminating "%" is
    #, encountered.  See the {format} module find out more about formatted
    #, output.
    #,
    #, The accepted formats are:
    #,	 b	Enclose a string in balanced single quotes (i.e. `...')
    #,   c	Center the string.
    #,	 C	Output a C-style string.
    #,	 d	Enclose the string in double quotes.
    #,   k	C keywords are mangled by adding "___k" to the end
    #,   l	Left justify the string.
    #,   p<pad>	Set padding character to <pad>.
    #,	 q	Enclose the string in single quotes.
    #,	 s	Output a raw string.
    #,	 S	Output a STIPPLE-style string.
    #,   w<width> Output a minimim of <width> characters.

    #, Scan the format characters:

procedure fetch1@string
    takes
	string string
	index unsigned
    returns character
    external string__fetch1

    #: This procedure will fetch the {index}'th character from {string}.
    #, {Bounds} is signaled if there is no {index}'th character.



procedure front_size_get@string
    takes
	string string
    returns unsigned
    external string__front_size_get



procedure gap_offset_get@string
    takes
	string string
    returns unsigned


    #: This procedure will return the gap offset of {string}.


procedure gap_offset_set@string
    takes
	string string
	new_gap_offset unsigned
    returns_nothing

    #: This procedure will set the gap offset of {string} to {new_gap_offset}.


procedure gap_set@string
    takes
	string string
	new_gap_offset unsigned
    returns_nothing
    external string__gap_set


procedure greater_than@string
    takes
	string1 string
	string2 string
    returns logical

    #: This procedure will return {logical@true} if {string1} is lexically
    #, greater than {string2}; otherwise, {logical@false} is returned.


procedure hash@string
    takes
	string string
    returns unsigned

    #: This procedure will return a hash value that depends upon every
    #, character in {string}.


procedure header_fetch@string
    takes
	string string
	index unsigned
    returns unsigned

    #: This procedure will return the {index}'th word of the {string} header.
    #, This procedure is strictly for the use {save@string}.


procedure identical@string
    takes
	string1 string
	string2 string
    returns logical

    #: This procedure will return {true} if {string1} and {string2}
    #, both refer to the same identical string and {false} otherwise.


procedure input@string
    takes
	in_stream in_stream
    returns string

    #: This procedure will read a string from {in_stream} and return it.


procedure integer_convert@string
    takes
	string string
    returns integer

    #: This procedure will convert {string} to a signed integer.


procedure integer_decimal_append@string
    takes
	buffer string
	integer integer
    returns_nothing

    #: This procedure will append {integer} to {buffer} as an decimal number.
    #, If {integer} is negative it will be preceeded by a "-" sign.


procedure is_binary@string
    takes
	string string
    returns logical

    #: This procedure will return {true} if {string} looks like it
    #, contains binary data rather than textual data.  The algorithm
    #, is heuristic although it should work in the presense of Latin-1
    #, characters.


procedure is_buffered@string
    takes
	string string
    returns logical
    external string__is_buffered


procedure is_buffered_get@string
    takes
	string string
    returns logical

    #: This procedure will return {true} if {string} has a buffer and
    #, {false} otherwise.  This procedure is present for {save@string} only.


procedure is_c_keyword@string
    takes
	string string
    returns logical

    #: This procedure will return {true} if {string} is a C keyword.
    #, In fact it adds in a few keywords from GCC as well.


procedure less_than@string
    takes
	string1 string
	string2 string
    returns logical

    #: This procedure will return {logical@true} if {string1} is lexically
    #, less than {string2}; otherwise, {logical@false} is returned.


procedure logical_convert@string
    takes
	string string
    returns logical

    #: This procedure will convert {string} into a {logical} value.
    #, A string value of "1" returns {true} and everything else a {false}.


procedure mode_get@string
    takes
	string string
    returns string_mode

    #: This procedure will return the string mode for {string}.


procedure mode_set@string
    takes
	string string
	mode string_mode
    returns_nothing

    #: This procedure will set the mode for {string} to {mode}.
    #, {permanent_read_only_string} is signaled if the mode of {string}
    #, is {permanent_read_only} and {mode} is not.


procedure output@string
    takes
	string string
	out_stream out_stream
    returns_nothing

    #: This procedure will output {string} to {out_stream}.


procedure parse@string
    takes
	in_stream in_stream
    returns string

    #: This procedure will parse a string from {in_stream} and return it.


procedure prefix_match@string
    takes
	string string
	prefix string
    returns logical

    #: This procedure will return {true}@{logical} if {prefix}
    #, matches the beginning of {string} and {false}@{logical}
    #, otherwise.


procedure prefix_match_no_case@string
    takes
	string string
	prefix string
    returns logical

    #: This procedure will return {true}@{logical} if {prefix}
    #, matches the beginning of {string} insensitive to case and
    #, {false}@{logical} otherwise.


procedure print@string
    takes
	string string
	out_stream out_stream
    returns_nothing

    #: This procedure will print {string} to {out_stream} in human readable
    #, form.

    # Should unlock string here!



procedure put@string
    takes
	string string
	out_stream out_stream
    returns_nothing

    #: This procedure will output {string} to {out_stream} one character
    #, at a time without doing any conversion on the characters.


procedure show@string
    takes
	string string
    returns_nothing


procedure xrange_extract@string
    takes
	string string
	offset unsigned
	length unsigned
	mode string_mode
    returns string

    #: This procedure will return the {length} characters from {string}
    #, starting at {offset} as a string of string mode {mode}.


procedure range_compare@string
    takes
	string1 string
	offset1 unsigned
	count1 unsigned
	string2 string
	offset2 unsigned
	count2 unsigned
    returns integer


procedure range_copy@string
    takes
	to_string string
	to_offset unsigned
	from_string string
	from_offset unsigned
	count unsigned
    returns_nothing


procedure range_delete@string
    takes
	string string
	offset unsigned
	count unsigned
    returns_nothing
    external string__range_delete

    # This routine will delete the {count} characters starting at
    # {offset} in {string}.

    

procedure range_extract@string
    takes
	string string
	offset unsigned
	length unsigned
	mode string_mode
    returns string

    #: This procedure will return the {length} characters from {string}
    #, starting at {offset} as a string of string mode {mode}.


procedure range_nulls_insert@string
    takes
	string string
	offset unsigned
	count unsigned
    returns_nothing
    external string__range_nulls_insert

    # This routine will insert {count} null characters ('\0\') into
    # {string} at {offset}.


procedure read@string
    takes
	in_stream in_stream
    returns string

    #: This procedure will read in a string from {in_stream} and return it.
    #, The returned string will be in {read_write} mode.


procedure read_only_copy@string
    takes
	string string
    returns string
    external string__read_only_copy

    #: This procedure will return a read only copy of {string}.



procedure save@string
    takes
	string string
	save save
	offset unsigned
    returns_nothing

    #: This procedure will save {string} into {save} at {offset}.


#    size :@= string.size
#    if string.is_buffered
#	header_offset :@= read_write_lookup@(save, string.address)
#	if header_offset = 0
#	    header_offset :@= read_write_allocate@(save, string.address, 2)
#	    save[header_offset + 0] :=
#	      characters@(save, header_fetch@(string, 0))
#	    size :@= string.buffer_size
#	    buffer_offset :@= read_write_allocate@(save, save.zero, size)
#	    save[header_offset + 1] := read_write@(save, buffer_offset)
#	    index :@= 0
#	    loop
#		while index < size
#	    	save[buffer_offset + index] :=
#		  characters@(save, buffer_fetch@(string, index))
#		index :+= 1
#	save[offset] := read_write@(save, header_offset)
#    else
#	words :@= (size + 2 + 3) / 4
#	header_offset :@= read_only_lookup@(save, string.address)
#	if header_offset = 0
#	    header_offset := read_only_allocate@(save, string.address, words)
#	    if header_offset != 0
#		index :@= 0
#		loop
#		    while index < words
#		    read_only_store@(save, header_offset + index,
#		      characters@(save, header_fetch@(string, index)))
#		    index :+= 1
#	save[offset] := read_only@(save, header_offset)


procedure space_pad@string
    takes
	string string
	target_size unsigned
    returns_nothing

    #: This procedure will append spaces to {string} until it
    #, at least {target_size} in size.


procedure size_get@string
    takes
	string string
    returns unsigned
    external string__size

    #: This procedure will return the size of {string}.
    


procedure store1@string
    takes
	string string
	index unsigned
	character character
    returns_nothing
    external string__store1

    #: This procedure will store {character} into the {index}'th location
    #, in {string}.  {Bounds} is signalled if there is no {index}'th
    #, location.



procedure string_append@string
    takes
	to_string string
	from_string string
    returns_nothing


procedure string_insert@string
    takes
	to_string string
	offset unsigned
	from_string string
    returns_nothing


procedure string_gap_insert@string
    takes
	to_string string
	from_string string
    returns_nothing

    #: This procedure will insert {from_string} into in {to_string}
    #, at the gap offset.
    

procedure sub_string_append@string
    takes
	target_string string
	source_string string
	source_offset unsigned
	source_length unsigned
    returns_nothing

    #: This procedure will append the {source_length} characters from
    #, {source_string} starting at {source_offset} to the end of
    #, {target_string}.

    #sub_string_insert@(target_string,
    #  target_string.size, source_string, source_offset, source_length)



procedure sub_string_compare@string
    takes
	left_string string
	left_offset unsigned
	left_length unsigned
	right_string string
	right_offset unsigned
	right_length unsigned
    returns integer

    #: This procedure will return -1, 0, or 1 depending upon whether
    #, {left_string} from {left_offset} for {left_length} characters
    #, is lexically less than, equal to, or greater than {right_string}
    #, from {right_offset} for {right_length} characters.


procedure sub_string_equal@string
    takes
	left_string string
	left_offset unsigned
	left_length unsigned
	right_string string
	right_offset unsigned
	right_length unsigned
    returns logical

    #: This procedure will return {true} if {left_string} from {left_offset}
    #, for {left_length} characters equal to {right_string} from
    #, {right_offset} for {right_length} characters.


procedure sub_string_insert@string
    takes
	to_string string
	to_offset unsigned
	from_string string
	from_offset unsigned
	from_length unsigned
    returns_nothing

    #: This procedure will insert the {from_length} characters starting at
    #, {from_offset} in {from_string} to {to_offset} in {to_string} moving
    #, the appropriate characters in {to_string} over by {from_length}
    #, characters.


procedure xstring_lop@string
    takes
	string string
    returns string
    
    
    #: This procedure will lop a string off of {string} and return it.


procedure string_lop@string
    takes
	string string
    returns string
    
    #: This procedure will lop a string off of {string} and return it.


procedure suffix_match@string
    takes
	string string
	suffix string
    returns logical

    #: This procedure will return {true}@{logical} if {suffix}
    #, matches the end of {string} and {false}@{logical} otherwise.


procedure xsymbol_lop@string
    takes
	string string
	mode string_mode
    returns string

    #: This procedure will skip over any white space in {string} and
    #, take the next sequence on non-white space characters and return
    #, them as a string of type {mode}.  The all characters up to
    #, and including the returned symbol are removed from the front of
    #, {string}.  If symbol is encountered, an empty string is returned
    #, and {string} is not modified.


procedure symbol_lop@string
    takes
	string string
	symbol string
    returns string

    #: This procedure will skip over any white space in {string} and
    #, take the next sequence on non-white space characters and return
    #, them as a string.  If {symbol} is ??@{string} a new mutable
    #, {string} is allocated; otherwse the symbol is appended to {symbol}.
    #, All of the characters up to and including the returned symbol
    #, are removed from the front of {string}.  If no symbol is encountered,
    #, an empty string is returned and {string} neither {string} nor {symbol}
    #, are modified.


procedure to_lower@string
    takes
	string string
    returns_nothing

    #: This procedure will convert each upper case character in {string} to
    #, lower case.


procedure to_upper@string
    takes
	string string
    returns_nothing

    #: This procedure will convert each lower case character in {string} to
    #, upper case.


procedure trim@string
    takes
	string string
	new_size unsigned
    returns_nothing

    #: This procedure will ensure that {string} is no longer than {new_size}.


procedure unsigned_append@string
    takes
	buffer string
	number unsigned
	radix unsigned
	prefix string
	suffix string
	width unsigned
	left_pad string
	right_pad string
	upper_case logical
    returns_nothing

    #: This procedure will append {number} to {buffer} as a base {radix}
    #, number.  The number is prefixed by {prefix} and suffixed by
    #, {suffix}.  A minimum of {width} characters is output.  If more
    #, than {width} characters are needed to output the number, the
    #, entire number including {prefix} and {suffix} will output;
    #, no truncation occurs.  {left_pad} and {right_pad} are used
    #, to pad the number on the left and right, respectively, when
    #, additional characters are needed to make the number {width}
    #, characters wide.  It is a fatal error if {width} is non-zero,
    #, and both {left_pad} and {right_pad} are empty.  If {upper_case}
    #, is {true}, any digits output for a radix greater than 10 (decimal),
    #, will be upper case; otherwise, lower case digits are output.
    #, This routine is *not* optimized for speed.


procedure unsigned_append_helper@string
    takes
	buffer string
	number unsigned
	radix unsigned
	upper_case logical
    returns_nothing

    #: This procedure will append {number} to {buffer} as a {radix} number.
    #, If {upper_case} is {true}, letter digits are in upper-case characters;
    #, otherwise, lower-case characters are used.


procedure unsigned_decimal_append@string
    takes
	buffer string
	number unsigned
    returns_nothing

    #: This procedure will append {number} to {buffer} as an unsigned
    #, decimal number.


procedure unsigned_hexadecimal_append@string
    takes
	buffer string
	number unsigned
    returns_nothing

    #: This procedure will append {number} to {buffer} as an unsigned
    #, hexadecimal number.


procedure unsigned_convert@string
    takes
	string string
    returns unsigned

    #: This procedure will treat string as an unsigned decimal number
    #, and return the corresponding value.


procedure unsigned_lop@string
    takes
	string string
    returns unsigned

    #: This procedure will treat will skip over any preceeding white_space
    #, in {string} and convert the next sequence of digits into a number
    #, and return it.  The white space and digits are removed from the
    #, front of {string}.  If no digits are encountered, 0 is returned
    #, an the contents of {string} are not effected.


procedure white_space_lop@string
    takes
	string string
    returns_nothing

    #: This procedure will remove any white_space from the begining of
    #, {string} if any is present.


procedure writable_copy@string
    takes
	string string
    returns string

    #: This procedure will return a writable copy of {string}.


procedure write@string
    takes
	string string
	out_stream out_stream
    returns_nothing

    #: This procedure will output {string} to {out_stream} so that
    #, it can be subsequently read using {read}@{string}().