2019-02-05 02:44:07 +02:00
/* ------------------------------------------------------------------------- */
/* "text" : Text translation, the abbreviations optimiser, the dictionary */
/* */
2024-02-27 00:51:15 +02:00
/* Part of Inform 6.43 */
2024-02-12 01:08:33 +02:00
/* copyright (c) Graham Nelson 1993 - 2024 */
2019-02-05 02:44:07 +02:00
/* */
/* ------------------------------------------------------------------------- */
# include "header.h"
2022-03-03 02:10:25 +02:00
uchar * low_strings ; /* Allocated to low_strings_top */
int32 low_strings_top ;
static memory_list low_strings_memlist ;
2019-02-05 02:44:07 +02:00
int32 static_strings_extent ; /* Number of bytes of static strings
made so far */
2022-07-24 13:10:45 +03:00
uchar * static_strings_area ; /* Used to hold the static strings
area so far
2022-03-03 02:10:25 +02:00
Allocated to static_strings_extent */
memory_list static_strings_area_memlist ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
static char * all_text ; /* Text buffer holding the entire text
2019-02-05 02:44:07 +02:00
of the game , when it is being
2022-03-03 02:10:25 +02:00
recorded
( Allocated to all_text_top ) */
static memory_list all_text_memlist ;
static int32 all_text_top ;
int abbrevs_lookup_table_made , /* The abbreviations lookup table is
2019-02-05 02:44:07 +02:00
constructed when the first non -
abbreviation string is translated :
this flag is TRUE after that */
abbrevs_lookup [ 256 ] ; /* Once this has been constructed,
abbrevs_lookup [ n ] = the smallest
number of any abbreviation beginning
with ASCII character n , or - 1
if none of the abbreviations do */
int no_abbreviations ; /* No of abbreviations defined so far */
/* ------------------------------------------------------------------------- */
/* Glulx string compression storage */
/* ------------------------------------------------------------------------- */
int no_strings ; /* No of strings in static strings
area . */
int no_dynamic_strings ; /* No. of @.. string escapes used
( actually , the highest value used
plus one ) */
int no_unicode_chars ; /* Number of distinct Unicode chars
used . ( Beyond 0xFF . ) */
huffentity_t * huff_entities ; /* The list of entities (characters,
abbreviations , @ . . escapes , and
the terminator ) */
static huffentity_t * * hufflist ; /* Copy of the list, for sorting */
int no_huff_entities ; /* The number of entities in the list */
int huff_unicode_start ; /* Position in the list where Unicode
chars begin . */
int huff_abbrev_start ; /* Position in the list where string
abbreviations begin . */
int huff_dynam_start ; /* Position in the list where @..
entities begin . */
int huff_entity_root ; /* The position in the list of the root
entry ( when considering the table
as a tree ) . */
int done_compression ; /* Has the game text been compressed? */
int32 compression_table_size ; /* Length of the Huffman table, in
bytes */
int32 compression_string_size ; /* Length of the compressed string
data , in bytes */
int32 * compressed_offsets ; /* The beginning of every string in
the game , relative to the beginning
of the Huffman table . ( So entry 0
2022-03-03 02:10:25 +02:00
is equal to compression_table_size ) .
Allocated to no_strings at
compress_game_text ( ) time . */
static memory_list compressed_offsets_memlist ;
unicode_usage_t * unicode_usage_entries ; /* Allocated to no_unicode_chars */
static memory_list unicode_usage_entries_memlist ;
2019-02-05 02:44:07 +02:00
# define UNICODE_HASH_BUCKETS (64)
2022-03-03 02:10:25 +02:00
static int unicode_usage_hash [ UNICODE_HASH_BUCKETS ] ;
2019-02-05 02:44:07 +02:00
static int unicode_entity_index ( int32 unicode ) ;
/* ------------------------------------------------------------------------- */
/* Abbreviation arrays */
/* ------------------------------------------------------------------------- */
2022-03-03 02:10:25 +02:00
abbreviation * abbreviations ; /* Allocated up to no_abbreviations */
static memory_list abbreviations_memlist ;
2024-02-12 01:08:33 +02:00
/* Memory to hold the text of any abbreviation strings declared. */
static int32 abbreviations_totaltext ;
static char * abbreviations_text ; /* Allocated up to abbreviations_totaltext */
static memory_list abbreviations_text_memlist ;
2022-03-03 02:10:25 +02:00
static int * abbreviations_optimal_parse_schedule ;
static memory_list abbreviations_optimal_parse_schedule_memlist ;
static int * abbreviations_optimal_parse_scores ;
static memory_list abbreviations_optimal_parse_scores_memlist ;
2019-02-05 02:44:07 +02:00
/* ------------------------------------------------------------------------- */
int32 total_chars_trans , /* Number of ASCII chars of text in */
total_bytes_trans , /* Number of bytes of Z-code text out */
zchars_trans_in_last_string ; /* Number of Z-chars in last string:
needed only for abbrev efficiency
calculation in " directs.c " */
2022-03-03 02:10:25 +02:00
static int32 total_zchars_trans ; /* Number of Z-chars of text out
2019-02-05 02:44:07 +02:00
( only used to calculate the above ) */
static int zchars_out_buffer [ 3 ] , /* During text translation, a buffer of
3 Z - chars at a time : when it ' s full
these are written as a 2 - byte word */
zob_index ; /* Index (0 to 2) into it */
2022-03-03 02:10:25 +02:00
uchar * translated_text ; /* Area holding translated strings
2022-07-24 13:10:45 +03:00
until they are moved into the
2022-03-03 02:10:25 +02:00
static_strings_area below */
static memory_list translated_text_memlist ;
2024-02-12 01:08:33 +02:00
static char * temp_symbol ; /* Temporary symbol name used while
processing " @(...) " . */
static memory_list temp_symbol_memlist ;
2022-03-03 02:10:25 +02:00
static int32 text_out_pos ; /* The "program counter" during text
translation : the next position to
2019-02-05 02:44:07 +02:00
write Z - coded text output to */
2022-03-03 02:10:25 +02:00
static int32 text_out_limit ; /* The upper limit of text_out_pos
during text translation ( or - 1
for no limit ) */
2019-02-05 02:44:07 +02:00
static int text_out_overflow ; /* During text translation, becomes
2022-03-03 02:10:25 +02:00
true if text_out_pos tries to pass
2019-02-05 02:44:07 +02:00
text_out_limit */
/* ------------------------------------------------------------------------- */
/* For variables/arrays used by the dictionary manager, see below */
/* ------------------------------------------------------------------------- */
/* ------------------------------------------------------------------------- */
/* Prepare the abbreviations lookup table (used to speed up abbreviation */
/* detection in text translation). We first bubble-sort the abbrevs into */
/* alphabetical order (this is necessary for the detection algorithm to */
/* to work). Since the table is only prepared once, and for a table */
/* of size at most 96, there's no point using an efficient sort algorithm. */
/* ------------------------------------------------------------------------- */
static void make_abbrevs_lookup ( void )
2024-02-12 01:08:33 +02:00
{ int bubble_sort , j , k ;
char * p1 , * p2 ;
2019-02-05 02:44:07 +02:00
do
{ bubble_sort = FALSE ;
for ( j = 0 ; j < no_abbreviations ; j + + )
for ( k = j + 1 ; k < no_abbreviations ; k + + )
2024-02-12 01:08:33 +02:00
{ p1 = abbreviation_text ( j ) ;
p2 = abbreviation_text ( k ) ;
2019-02-05 02:44:07 +02:00
if ( strcmp ( p1 , p2 ) < 0 )
2024-02-12 01:08:33 +02:00
{
abbreviation temp = abbreviations [ j ] ;
abbreviations [ j ] = abbreviations [ k ] ;
abbreviations [ k ] = temp ;
2019-02-05 02:44:07 +02:00
bubble_sort = TRUE ;
}
}
} while ( bubble_sort ) ;
for ( j = no_abbreviations - 1 ; j > = 0 ; j - - )
2024-02-12 01:08:33 +02:00
{ p1 = abbreviation_text ( j ) ;
2019-02-05 02:44:07 +02:00
abbrevs_lookup [ ( uchar ) p1 [ 0 ] ] = j ;
2022-03-03 02:10:25 +02:00
abbreviations [ j ] . freq = 0 ;
2019-02-05 02:44:07 +02:00
}
abbrevs_lookup_table_made = TRUE ;
}
/* ------------------------------------------------------------------------- */
/* Search the abbreviations lookup table (a routine which must be fast). */
/* The source text to compare is text[i], text[i+1], ... and this routine */
/* is only called if text[i] is indeed the first character of at least one */
/* abbreviation, "from" begin the least index into the abbreviations table */
/* of an abbreviation for which text[i] is the first character. Recall */
/* that the abbrevs table is in alphabetical order. */
/* */
/* The return value is -1 if there is no match. If there is a match, the */
/* text to be abbreviated out is over-written by a string of null chars */
/* with "ASCII" value 1, and the abbreviation number is returned. */
/* */
/* In Glulx, we *do not* do this overwriting with 1's. */
/* ------------------------------------------------------------------------- */
static int try_abbreviations_from ( unsigned char * text , int i , int from )
{ int j , k ; uchar * p , c ;
c = text [ i ] ;
2024-02-12 01:08:33 +02:00
for ( j = from ;
j < no_abbreviations ;
j + + )
{
p = ( uchar * ) abbreviations_text + abbreviations [ j ] . textpos ;
if ( c ! = p [ 0 ] ) break ;
if ( text [ i + 1 ] = = p [ 1 ] )
2019-02-05 02:44:07 +02:00
{ for ( k = 2 ; p [ k ] ! = 0 ; k + + )
if ( text [ i + k ] ! = p [ k ] ) goto NotMatched ;
if ( ! glulx_mode ) {
for ( k = 0 ; p [ k ] ! = 0 ; k + + ) text [ i + k ] = 1 ;
}
2022-03-03 02:10:25 +02:00
abbreviations [ j ] . freq + + ;
2019-02-05 02:44:07 +02:00
return ( j ) ;
NotMatched : ;
}
}
return ( - 1 ) ;
}
2024-02-12 01:08:33 +02:00
/* Create an abbreviation. */
2019-02-05 02:44:07 +02:00
extern void make_abbreviation ( char * text )
{
2024-02-12 01:08:33 +02:00
int alen ;
int32 pos ;
2022-07-24 13:10:45 +03:00
/* If -e mode is off, we won't waste space creating an abbreviation entry. */
if ( ! economy_switch )
return ;
2024-02-12 01:08:33 +02:00
alen = strlen ( text ) ;
pos = abbreviations_totaltext ;
2022-07-24 13:10:45 +03:00
2022-03-03 02:10:25 +02:00
ensure_memory_list_available ( & abbreviations_memlist , no_abbreviations + 1 ) ;
2024-02-12 01:08:33 +02:00
ensure_memory_list_available ( & abbreviations_text_memlist , pos + alen + 1 ) ;
strcpy ( abbreviations_text + pos , text ) ;
abbreviations_totaltext + = ( alen + 1 ) ;
2019-02-05 02:44:07 +02:00
2024-02-12 01:08:33 +02:00
abbreviations [ no_abbreviations ] . textpos = pos ;
abbreviations [ no_abbreviations ] . textlen = alen ;
2022-03-03 02:10:25 +02:00
abbreviations [ no_abbreviations ] . value = compile_string ( text , STRCTX_ABBREV ) ;
abbreviations [ no_abbreviations ] . freq = 0 ;
2019-02-05 02:44:07 +02:00
/* The quality is the number of Z-chars saved by using this */
/* abbreviation: note that it takes 2 Z-chars to print it. */
2022-03-03 02:10:25 +02:00
abbreviations [ no_abbreviations ] . quality = zchars_trans_in_last_string - 2 ;
if ( abbreviations [ no_abbreviations ] . quality < = 0 ) {
warning_named ( " Abbreviation does not save any characters: " , text ) ;
}
no_abbreviations + + ;
2019-02-05 02:44:07 +02:00
}
2024-02-12 01:08:33 +02:00
/* Return a pointer to the (uncompressed) abbreviation text.
This should be treated as temporary ; it is only valid until the next
make_abbreviation ( ) call . */
extern char * abbreviation_text ( int num )
{
if ( num < 0 | | num > = no_abbreviations ) {
compiler_error ( " Invalid abbrev for abbreviation_text() " ) ;
return " " ;
}
return abbreviations_text + abbreviations [ num ] . textpos ;
}
2019-02-05 02:44:07 +02:00
/* ------------------------------------------------------------------------- */
2022-03-03 02:10:25 +02:00
/* The front end routine for text translation. */
/* strctx indicates the purpose of the string. This is mostly used for */
/* informational output (gametext.txt), but we treat some string contexts */
/* specially during compilation. */
2019-02-05 02:44:07 +02:00
/* ------------------------------------------------------------------------- */
2024-02-12 01:08:33 +02:00
/* TODO: When called from a print statement (parse_print()), it would be
nice to detect if the generated string is exactly one character . In that
case , we could return the character value and a flag to indicate the
caller could use @ print_char / @ streamchar / @ new_line / @ streamunichar
instead of printing a compiled string .
We ' d need a new STRCTX value or two to distinguish direct - printed strings
from referenceable strings .
Currently , parse_print ( ) checks for the " ^ " case manually , which is a
bit icky . */
2022-03-03 02:10:25 +02:00
extern int32 compile_string ( char * b , int strctx )
{ int32 i , j , k ;
uchar * c ;
2022-07-24 13:10:45 +03:00
int in_low_memory ;
if ( execution_never_reaches_here ) {
/* No need to put strings into gametext.txt or the static/low
strings areas . */
if ( strctx = = STRCTX_GAME | | strctx = = STRCTX_GAMEOPC | | strctx = = STRCTX_LOWSTRING | | strctx = = STRCTX_INFIX ) {
/* VENEER and VENEEROPC are only used at the translate_text level,
so we don ' t have to catch them here . */
return 0 ;
}
}
2022-03-03 02:10:25 +02:00
/* In Z-code, abbreviations go in the low memory pool (0x100). So
do strings explicitly defined with the Lowstring directive .
( In Glulx , the in_low_memory flag is ignored . ) */
2022-07-24 13:10:45 +03:00
in_low_memory = ( strctx = = STRCTX_ABBREV | | strctx = = STRCTX_LOWSTRING ) ;
2019-02-05 02:44:07 +02:00
if ( ! glulx_mode & & in_low_memory )
2022-03-03 02:10:25 +02:00
{
k = translate_text ( - 1 , b , strctx ) ;
if ( k < 0 ) {
error ( " text translation failed " ) ;
k = 0 ;
}
ensure_memory_list_available ( & low_strings_memlist , low_strings_top + k ) ;
memcpy ( low_strings + low_strings_top , translated_text , k ) ;
j = low_strings_top ;
low_strings_top + = k ;
2019-02-05 02:44:07 +02:00
return ( 0x21 + ( j / 2 ) ) ;
}
if ( glulx_mode & & done_compression )
compiler_error ( " Tried to add a string after compression was done. " ) ;
2022-03-03 02:10:25 +02:00
i = translate_text ( - 1 , b , strctx ) ;
if ( i < 0 ) {
error ( " text translation failed " ) ;
i = 0 ;
}
2019-02-05 02:44:07 +02:00
/* Insert null bytes as needed to ensure that the next static string */
/* also occurs at an address expressible as a packed address */
if ( ! glulx_mode ) {
int textalign ;
if ( oddeven_packing_switch )
textalign = scale_factor * 2 ;
else
textalign = scale_factor ;
while ( ( i % textalign ) ! = 0 )
{
2022-03-03 02:10:25 +02:00
ensure_memory_list_available ( & translated_text_memlist , i + 2 ) ;
translated_text [ i + + ] = 0 ;
translated_text [ i + + ] = 0 ;
2019-02-05 02:44:07 +02:00
}
}
j = static_strings_extent ;
2022-07-24 13:10:45 +03:00
ensure_memory_list_available ( & static_strings_area_memlist , static_strings_extent + i ) ;
for ( c = translated_text ; c < translated_text + i ;
c + + , static_strings_extent + + )
static_strings_area [ static_strings_extent ] = * c ;
2019-02-05 02:44:07 +02:00
if ( ! glulx_mode ) {
return ( j / scale_factor ) ;
}
else {
/* The marker value is a one-based string number. (We reserve zero
to mean " not a string at all " . */
return ( + + no_strings ) ;
}
}
/* ------------------------------------------------------------------------- */
/* Output a single Z-character into the buffer, and flush it if full */
/* ------------------------------------------------------------------------- */
static void write_z_char_z ( int i )
{ uint32 j ;
ASSERT_ZCODE ( ) ;
total_zchars_trans + + ;
zchars_out_buffer [ zob_index + + ] = ( i % 32 ) ;
if ( zob_index ! = 3 ) return ;
zob_index = 0 ;
j = zchars_out_buffer [ 0 ] * 0x0400 + zchars_out_buffer [ 1 ] * 0x0020
+ zchars_out_buffer [ 2 ] ;
2022-03-03 02:10:25 +02:00
if ( text_out_limit > = 0 ) {
if ( text_out_pos + 2 > text_out_limit ) {
text_out_overflow = TRUE ;
return ;
}
2019-02-05 02:44:07 +02:00
}
2022-03-03 02:10:25 +02:00
else {
ensure_memory_list_available ( & translated_text_memlist , text_out_pos + 2 ) ;
}
translated_text [ text_out_pos + + ] = j / 256 ; translated_text [ text_out_pos + + ] = j % 256 ;
2019-02-05 02:44:07 +02:00
total_bytes_trans + = 2 ;
}
static void write_zscii ( int zsc )
{
int lookup_value , in_alphabet ;
if ( zsc = = ' ' )
{ write_z_char_z ( 0 ) ;
return ;
}
if ( zsc < 0x100 ) lookup_value = zscii_to_alphabet_grid [ zsc ] ;
else lookup_value = - 1 ;
if ( lookup_value > = 0 )
{ alphabet_used [ lookup_value ] = ' Y ' ;
in_alphabet = lookup_value / 26 ;
if ( in_alphabet = = 1 ) write_z_char_z ( 4 ) ; /* SHIFT to A1 */
if ( in_alphabet = = 2 ) write_z_char_z ( 5 ) ; /* SHIFT to A2 */
write_z_char_z ( lookup_value % 26 + 6 ) ;
}
else
{ write_z_char_z ( 5 ) ; write_z_char_z ( 6 ) ;
write_z_char_z ( zsc / 32 ) ; write_z_char_z ( zsc % 32 ) ;
}
}
/* ------------------------------------------------------------------------- */
/* Finish a Z-coded string, padding out with Z-char 5s if necessary and */
/* setting the "end" bit on the final 2-byte word */
/* ------------------------------------------------------------------------- */
static void end_z_chars ( void )
2022-03-03 02:10:25 +02:00
{
2019-02-05 02:44:07 +02:00
zchars_trans_in_last_string = total_zchars_trans - zchars_trans_in_last_string ;
while ( zob_index ! = 0 ) write_z_char_z ( 5 ) ;
2022-03-03 02:10:25 +02:00
if ( text_out_pos < 2 ) {
/* Something went wrong. */
text_out_overflow = TRUE ;
return ;
}
translated_text [ text_out_pos - 2 ] + = 128 ;
2019-02-05 02:44:07 +02:00
}
/* Glulx handles this much more simply -- compression is done elsewhere. */
static void write_z_char_g ( int i )
{
2022-03-03 02:10:25 +02:00
ASSERT_GLULX ( ) ;
if ( text_out_limit > = 0 ) {
if ( text_out_pos + 1 > text_out_limit ) {
text_out_overflow = TRUE ;
return ;
}
}
else {
ensure_memory_list_available ( & translated_text_memlist , text_out_pos + 1 ) ;
}
total_zchars_trans + + ;
translated_text [ text_out_pos + + ] = i ;
total_bytes_trans + + ;
}
/* Helper routine to compute the weight, in units, of a character handled by the Z-Machine */
static int zchar_weight ( int c )
{
2024-02-12 01:08:33 +02:00
int lookup ;
if ( c = = ' ' ) return 1 ;
lookup = iso_to_alphabet_grid [ c ] ;
2022-03-03 02:10:25 +02:00
if ( lookup < 0 ) return 4 ;
if ( lookup < 26 ) return 1 ;
return 2 ;
2019-02-05 02:44:07 +02:00
}
/* ------------------------------------------------------------------------- */
/* The main routine "text.c" provides to the rest of Inform: the text */
2022-03-03 02:10:25 +02:00
/* translator. s_text is the source text and the return value is the */
/* number of bytes translated. */
/* The translated text will be stored in translated_text. */
/* */
/* If p_limit is >= 0, the text length will not exceed that many bytes. */
/* If the translation tries to overflow this boundary, the return value */
/* will be -1. (You should display an error and not read translated_text.) */
/* */
/* If p_limit is negative, any amount of text is accepted (up to int32 */
/* anyway). */
/* */
2019-02-05 02:44:07 +02:00
/* Note that the source text may be corrupted by this routine. */
/* ------------------------------------------------------------------------- */
2022-03-03 02:10:25 +02:00
extern int32 translate_text ( int32 p_limit , char * s_text , int strctx )
{ int i , j , k , in_alphabet , lookup_value , is_abbreviation ;
2019-02-05 02:44:07 +02:00
int32 unicode ; int zscii ;
unsigned char * text_in ;
2022-03-03 02:10:25 +02:00
if ( p_limit > = 0 ) {
ensure_memory_list_available ( & translated_text_memlist , p_limit ) ;
}
/* For STRCTX_ABBREV, the string being translated is itself an
abbreviation string , so it can ' t make use of abbreviations . Set
the is_abbreviation flag to indicate this .
The compiler has historically set this flag for the Lowstring
directive as well - - the in_low_memory and is_abbreviation flag were
always the same . I am preserving that convention . */
is_abbreviation = ( strctx = = STRCTX_ABBREV | | strctx = = STRCTX_LOWSTRING ) ;
/* Cast the input and output streams to unsigned char: text_out_pos will
2019-02-05 02:44:07 +02:00
advance as bytes of Z - coded text are written , but text_in doesn ' t */
text_in = ( unsigned char * ) s_text ;
2022-03-03 02:10:25 +02:00
text_out_pos = 0 ;
text_out_limit = p_limit ;
2019-02-05 02:44:07 +02:00
text_out_overflow = FALSE ;
/* Remember the Z-chars total so that later we can subtract to find the
number of Z - chars translated on this string */
zchars_trans_in_last_string = total_zchars_trans ;
/* Start with the Z-characters output buffer empty */
zob_index = 0 ;
/* If this is the first text translated since the abbreviations were
declared , and if some were declared , then it ' s time to make the
lookup table for abbreviations
( Except : we don ' t if the text being translated is itself
the text of an abbreviation currently being defined ) */
if ( ( ! abbrevs_lookup_table_made ) & & ( no_abbreviations > 0 )
& & ( ! is_abbreviation ) )
make_abbrevs_lookup ( ) ;
2022-03-03 02:10:25 +02:00
/* If we're storing the whole game text to memory, then add this text.
We will put two newlines between each text and four at the very end .
( The optimise code does a lot of sloppy text [ i + 2 ] , so the extra
two newlines past all_text_top are necessary . ) */
2019-02-05 02:44:07 +02:00
if ( ( ! is_abbreviation ) & & ( store_the_text ) )
2022-03-03 02:10:25 +02:00
{ int addlen = strlen ( s_text ) ;
ensure_memory_list_available ( & all_text_memlist , all_text_top + addlen + 5 ) ;
sprintf ( all_text + all_text_top , " %s \n \n \n \n " , s_text ) ;
/* Advance past two newlines. */
all_text_top + = ( addlen + 2 ) ;
2019-02-05 02:44:07 +02:00
}
2022-03-03 02:10:25 +02:00
if ( transcript_switch ) {
/* Omit veneer strings, unless we're using the new transcript format, which includes everything. */
if ( ( ! veneer_mode ) | | TRANSCRIPT_FORMAT = = 1 ) {
int label = strctx ;
if ( veneer_mode ) {
if ( label = = STRCTX_GAME )
label = STRCTX_VENEER ;
else if ( label = = STRCTX_GAMEOPC )
label = STRCTX_VENEEROPC ;
}
write_to_transcript_file ( s_text , label ) ;
}
}
/* Computing the optimal way to parse strings to insert abbreviations with dynamic programming */
/* (ref: R.A. Wagner , "Common phrases and minimum-space text storage", Commun. ACM, 16 (3) (1973)) */
/* We compute this optimal way here; it's stored in abbreviations_optimal_parse_schedule */
if ( economy_switch )
{
uchar * q , c ;
int l , min_score , from ;
int text_in_length ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
text_in_length = strlen ( ( char * ) text_in ) ;
ensure_memory_list_available ( & abbreviations_optimal_parse_schedule_memlist , text_in_length ) ;
ensure_memory_list_available ( & abbreviations_optimal_parse_scores_memlist , text_in_length + 1 ) ;
abbreviations_optimal_parse_scores [ text_in_length ] = 0 ;
for ( j = text_in_length - 1 ; j > = 0 ; j - - )
{ /* Initial values: empty schedule, score = just write the letter without abbreviating. */
abbreviations_optimal_parse_schedule [ j ] = - 1 ;
min_score = zchar_weight ( text_in [ j ] ) + abbreviations_optimal_parse_scores [ j + 1 ] ;
/* If there's an abbreviation starting with that letter... */
if ( ( from = abbrevs_lookup [ text_in [ j ] ] ) ! = - 1 )
{
c = text_in [ j ] ;
/* Loop on all abbreviations starting with what is in c. */
2024-02-12 01:08:33 +02:00
for ( k = from ;
k < no_abbreviations ;
k + + )
{
q = ( uchar * ) abbreviations_text + abbreviations [ k ] . textpos ;
if ( c ! = q [ 0 ] ) break ;
2022-03-03 02:10:25 +02:00
/* Let's compare; we also keep track of the length of the abbreviation. */
for ( l = 1 ; q [ l ] ! = 0 ; l + + )
{ if ( text_in [ j + l ] ! = q [ l ] ) { goto NotMatched ; }
}
/* We have a match (length l), but is it smaller in size? */
if ( min_score > 2 + abbreviations_optimal_parse_scores [ j + l ] )
{ /* It is indeed smaller, so let's write it down in our schedule. */
min_score = 2 + abbreviations_optimal_parse_scores [ j + l ] ;
abbreviations_optimal_parse_schedule [ j ] = k ;
}
NotMatched : ;
}
}
/* We gave it our best, this is the smallest we got. */
abbreviations_optimal_parse_scores [ j ] = min_score ;
}
}
2019-02-05 02:44:07 +02:00
if ( ! glulx_mode ) {
/* The empty string of Z-text is illegal, since it can't carry an end
bit : so we translate an empty string of ASCII text to just the
pad character 5. Printing this causes nothing to appear on screen . */
if ( text_in [ 0 ] = = 0 ) write_z_char_z ( 5 ) ;
/* Loop through the characters of the null-terminated input text: note
that if 1 is written over a character in the input text , it is
afterwards ignored */
for ( i = 0 ; text_in [ i ] ! = 0 ; i + + )
{ total_chars_trans + + ;
/* Contract ". " into ". " if double-space-removing switch set:
likewise " ? " and " ! " if the setting is high enough */
if ( ( double_space_setting > = 1 )
& & ( text_in [ i + 1 ] = = ' ' ) & & ( text_in [ i + 2 ] = = ' ' ) )
{ if ( text_in [ i ] = = ' . ' ) text_in [ i + 2 ] = 1 ;
if ( double_space_setting > = 2 )
{ if ( text_in [ i ] = = ' ? ' ) text_in [ i + 2 ] = 1 ;
if ( text_in [ i ] = = ' ! ' ) text_in [ i + 2 ] = 1 ;
}
}
2022-03-03 02:10:25 +02:00
/* Try abbreviations if the economy switch set. */
/* Look at the abbreviation schedule to see if we should abbreviate here. */
/* Note: Just because the schedule has something doesn't mean we should abbreviate there; */
/* sometimes you abbreviate before because it's better. If we have already replaced the */
/* char by a '1', it means we're in the middle of an abbreviation; don't try to abbreviate then. */
if ( ( economy_switch ) & & ( ! is_abbreviation ) & & text_in [ i ] ! = 1 & &
( ( j = abbreviations_optimal_parse_schedule [ i ] ) ! = - 1 ) )
{
/* Fill with 1s, which will get ignored by everyone else. */
2024-02-12 01:08:33 +02:00
uchar * p = ( uchar * ) abbreviation_text ( j ) ;
2022-03-03 02:10:25 +02:00
for ( k = 0 ; p [ k ] ! = 0 ; k + + ) text_in [ i + k ] = 1 ;
/* Actually write the abbreviation in the story file. */
abbreviations [ j ] . freq + + ;
/* Abbreviations run from MAX_DYNAMIC_STRINGS to 96. */
j + = MAX_DYNAMIC_STRINGS ;
write_z_char_z ( j / 32 + 1 ) ; write_z_char_z ( j % 32 ) ;
2019-02-05 02:44:07 +02:00
}
2022-03-03 02:10:25 +02:00
2019-02-05 02:44:07 +02:00
/* If Unicode switch set, use text_to_unicode to perform UTF-8
decoding */
if ( character_set_unicode & & ( text_in [ i ] & 0x80 ) )
{ unicode = text_to_unicode ( ( char * ) ( text_in + i ) ) ;
zscii = unicode_to_zscii ( unicode ) ;
if ( zscii ! = 5 ) write_zscii ( zscii ) ;
else
{ unicode_char_error (
" Character can only be used if declared in \
advance as part of ' Zcharacter table ' : " , unicode);
}
i + = textual_form_length - 1 ;
continue ;
}
/* '@' is the escape character in Inform string notation: the various
possibilities are :
@ @ decimalnumber : write this ZSCII char ( 0 to 1023 )
2022-07-24 13:10:45 +03:00
@ twodigits or : write the abbreviation string with this
@ ( digits ) decimal number
@ ( symbol ) : write the abbreviation string with this
( constant ) value
2019-02-05 02:44:07 +02:00
@ accentcode : this accented character : e . g . ,
for @ ' e write an E - acute
@ { . . . } : this Unicode char ( in hex ) */
if ( text_in [ i ] = = ' @ ' )
{ if ( text_in [ i + 1 ] = = ' @ ' )
{
2022-07-24 13:10:45 +03:00
/* @@... (ascii value) */
2019-02-05 02:44:07 +02:00
i + = 2 ; j = atoi ( ( char * ) ( text_in + i ) ) ;
switch ( j )
{ /* Prevent ~ and ^ from being translated to double-quote
and new - line , as they ordinarily would be */
case 94 : write_z_char_z ( 5 ) ; write_z_char_z ( 6 ) ;
write_z_char_z ( 94 / 32 ) ; write_z_char_z ( 94 % 32 ) ;
break ;
case 126 : write_z_char_z ( 5 ) ; write_z_char_z ( 6 ) ;
write_z_char_z ( 126 / 32 ) ; write_z_char_z ( 126 % 32 ) ;
break ;
default : write_zscii ( j ) ; break ;
}
while ( isdigit ( text_in [ i ] ) ) i + + ; i - - ;
}
2022-07-24 13:10:45 +03:00
else if ( text_in [ i + 1 ] = = ' ( ' )
{
/* @(...) (dynamic string) */
int len = 0 , digits = 0 ;
i + = 2 ;
/* This accepts "12xyz" as a symbol, which it really isn't,
but that just means it won ' t be found . */
2024-02-12 01:08:33 +02:00
while ( ( text_in [ i ] = = ' _ ' | | isalnum ( text_in [ i ] ) ) ) {
2022-07-24 13:10:45 +03:00
char ch = text_in [ i + + ] ;
if ( isdigit ( ch ) ) digits + + ;
2024-02-12 01:08:33 +02:00
ensure_memory_list_available ( & temp_symbol_memlist , len + 1 ) ;
temp_symbol [ len + + ] = ch ;
2022-07-24 13:10:45 +03:00
}
2024-02-12 01:08:33 +02:00
ensure_memory_list_available ( & temp_symbol_memlist , len + 1 ) ;
temp_symbol [ len ] = ' \0 ' ;
2022-07-24 13:10:45 +03:00
j = - 1 ;
2024-02-12 01:08:33 +02:00
/* We would like to parse temp_symbol as *either* a decimal
2022-07-24 13:10:45 +03:00
number or a constant symbol . */
if ( text_in [ i ] ! = ' ) ' | | len = = 0 ) {
error ( " '@(...)' abbreviation must contain a symbol " ) ;
}
else if ( digits = = len ) {
/* all digits; parse as decimal */
2024-02-12 01:08:33 +02:00
j = atoi ( temp_symbol ) ;
2022-07-24 13:10:45 +03:00
}
else {
2024-02-12 01:08:33 +02:00
int sym = get_symbol_index ( temp_symbol ) ;
if ( sym < 0 | | ( symbols [ sym ] . flags & UNKNOWN_SFLAG ) | | symbols [ sym ] . type ! = CONSTANT_T | | symbols [ sym ] . marker ) {
error_named ( " '@(...)' abbreviation expected a known constant value, but contained " , temp_symbol ) ;
2022-07-24 13:10:45 +03:00
}
else {
symbols [ sym ] . flags | = USED_SFLAG ;
j = symbols [ sym ] . value ;
}
}
if ( ! glulx_mode & & j > = 96 ) {
error_max_dynamic_strings ( j ) ;
j = - 1 ;
}
if ( j > = MAX_DYNAMIC_STRINGS ) {
error_max_dynamic_strings ( j ) ;
j = - 1 ;
}
if ( j > = 0 ) {
write_z_char_z ( j / 32 + 1 ) ; write_z_char_z ( j % 32 ) ;
}
else {
write_z_char_z ( ' ' ) ; /* error fallback */
}
}
2019-02-05 02:44:07 +02:00
else if ( isdigit ( text_in [ i + 1 ] ) ! = 0 )
{ int d1 , d2 ;
2022-03-03 02:10:25 +02:00
/* @.. (dynamic string) */
2019-02-05 02:44:07 +02:00
d1 = character_digit_value [ text_in [ i + 1 ] ] ;
d2 = character_digit_value [ text_in [ i + 2 ] ] ;
if ( ( d1 = = 127 ) | | ( d1 > = 10 ) | | ( d2 = = 127 ) | | ( d2 > = 10 ) )
error ( " '@..' must have two decimal digits " ) ;
else
2022-03-03 02:10:25 +02:00
{
j = d1 * 10 + d2 ;
if ( ! glulx_mode & & j > = 96 ) {
error_max_dynamic_strings ( j ) ;
2022-07-24 13:10:45 +03:00
j = - 1 ;
2022-03-03 02:10:25 +02:00
}
if ( j > = MAX_DYNAMIC_STRINGS ) {
/* Shouldn't get here with two digits */
error_max_dynamic_strings ( j ) ;
2022-07-24 13:10:45 +03:00
j = - 1 ;
2022-03-03 02:10:25 +02:00
}
i + = 2 ;
2022-07-24 13:10:45 +03:00
if ( j > = 0 ) {
write_z_char_z ( j / 32 + 1 ) ; write_z_char_z ( j % 32 ) ;
}
else {
write_z_char_z ( ' ' ) ; /* error fallback */
}
2019-02-05 02:44:07 +02:00
}
}
else
{
/* A string escape specifying an unusual character */
unicode = text_to_unicode ( ( char * ) ( text_in + i ) ) ;
zscii = unicode_to_zscii ( unicode ) ;
if ( zscii ! = 5 ) write_zscii ( zscii ) ;
else
{ unicode_char_error (
" Character can only be used if declared in \
advance as part of ' Zcharacter table ' : " , unicode);
}
i + = textual_form_length - 1 ;
}
}
else
{ /* Skip a character which has been over-written with the null
value 1 earlier on */
if ( text_in [ i ] ! = 1 )
{ if ( text_in [ i ] = = ' ' ) write_z_char_z ( 0 ) ;
else
{ j = ( int ) text_in [ i ] ;
lookup_value = iso_to_alphabet_grid [ j ] ;
if ( lookup_value < 0 )
{ /* The character isn't in the standard alphabets, so
we have to use the ZSCII 4 - Z - char sequence */
if ( lookup_value = = - 5 )
{ /* Character isn't in the ZSCII set at all */
unicode = iso_to_unicode ( j ) ;
unicode_char_error (
" Character can only be used if declared in \
advance as part of ' Zcharacter table ' : " , unicode);
write_zscii ( 0x200 + unicode / 0x100 ) ;
write_zscii ( 0x300 + unicode % 0x100 ) ;
}
else write_zscii ( - lookup_value ) ;
}
else
{ /* The character is in one of the standard alphabets:
write a SHIFT to temporarily change alphabet if
it isn ' t in alphabet 0 , then write the Z - char */
alphabet_used [ lookup_value ] = ' Y ' ;
in_alphabet = lookup_value / 26 ;
if ( in_alphabet = = 1 ) write_z_char_z ( 4 ) ; /* SHIFT to A1 */
if ( in_alphabet = = 2 ) write_z_char_z ( 5 ) ; /* SHIFT to A2 */
write_z_char_z ( lookup_value % 26 + 6 ) ;
}
}
}
}
}
/* Flush the Z-characters output buffer and set the "end" bit */
end_z_chars ( ) ;
}
else {
/* The text storage here is, of course, temporary. Compression
will occur when we ' re finished compiling , so that all the
clever Huffman stuff will work .
In the stored text , we use " @@ " to indicate @ ,
" @0 " to indicate a zero byte ,
" @ANNNN " to indicate an abbreviation ,
" @DNNNN " to indicate a dynamic string thing .
" @UNNNN " to indicate a four - byte Unicode value ( 0x100 or higher ) .
( NNNN is a four - digit hex number using the letters A - P . . . an
ugly representation but a convenient one . )
*/
for ( i = 0 ; text_in [ i ] ! = 0 ; i + + ) {
/* Contract ". " into ". " if double-space-removing switch set:
likewise " ? " and " ! " if the setting is high enough . */
if ( ( double_space_setting > = 1 )
& & ( text_in [ i + 1 ] = = ' ' ) & & ( text_in [ i + 2 ] = = ' ' ) ) {
if ( text_in [ i ] = = ' . '
| | ( double_space_setting > = 2
& & ( text_in [ i ] = = ' ? ' | | text_in [ i ] = = ' ! ' ) ) ) {
text_in [ i + 1 ] = text_in [ i ] ;
i + + ;
}
}
total_chars_trans + + ;
/* Try abbreviations if the economy switch set. We have to be in
compression mode too , since the abbreviation mechanism is part
of string decompression . */
if ( ( economy_switch ) & & ( compression_switch ) & & ( ! is_abbreviation )
& & ( ( k = abbrevs_lookup [ text_in [ i ] ] ) ! = - 1 )
& & ( ( j = try_abbreviations_from ( text_in , i , k ) ) ! = - 1 ) ) {
2024-02-12 01:08:33 +02:00
char * cx = abbreviation_text ( j ) ;
2019-02-05 02:44:07 +02:00
i + = ( strlen ( cx ) - 1 ) ;
write_z_char_g ( ' @ ' ) ;
write_z_char_g ( ' A ' ) ;
write_z_char_g ( ' A ' + ( ( j > > 12 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 8 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 4 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j ) & 0x0F ) ) ;
}
else if ( text_in [ i ] = = ' @ ' ) {
if ( text_in [ i + 1 ] = = ' @ ' ) {
/* An ASCII code */
i + = 2 ; j = atoi ( ( char * ) ( text_in + i ) ) ;
if ( j = = ' @ ' | | j = = ' \0 ' ) {
write_z_char_g ( ' @ ' ) ;
if ( j = = 0 ) {
j = ' 0 ' ;
if ( ! compression_switch )
warning ( " Ascii @@0 will prematurely terminate non-compressed \
string . " );
}
}
write_z_char_g ( j ) ;
while ( isdigit ( text_in [ i ] ) ) i + + ; i - - ;
}
2022-07-24 13:10:45 +03:00
else if ( text_in [ i + 1 ] = = ' ( ' ) {
int len = 0 , digits = 0 ;
i + = 2 ;
/* This accepts "12xyz" as a symbol, which it really isn't,
but that just means it won ' t be found . */
2024-02-12 01:08:33 +02:00
while ( ( text_in [ i ] = = ' _ ' | | isalnum ( text_in [ i ] ) ) ) {
2022-07-24 13:10:45 +03:00
char ch = text_in [ i + + ] ;
if ( isdigit ( ch ) ) digits + + ;
2024-02-12 01:08:33 +02:00
ensure_memory_list_available ( & temp_symbol_memlist , len + 1 ) ;
temp_symbol [ len + + ] = ch ;
2022-07-24 13:10:45 +03:00
}
2024-02-12 01:08:33 +02:00
ensure_memory_list_available ( & temp_symbol_memlist , len + 1 ) ;
temp_symbol [ len ] = ' \0 ' ;
2022-07-24 13:10:45 +03:00
j = - 1 ;
2024-02-12 01:08:33 +02:00
/* We would like to parse temp_symbol as *either* a decimal
2022-07-24 13:10:45 +03:00
number or a constant symbol . */
if ( text_in [ i ] ! = ' ) ' | | len = = 0 ) {
error ( " '@(...)' abbreviation must contain a symbol " ) ;
}
else if ( digits = = len ) {
/* all digits; parse as decimal */
2024-02-12 01:08:33 +02:00
j = atoi ( temp_symbol ) ;
2022-07-24 13:10:45 +03:00
}
else {
2024-02-12 01:08:33 +02:00
int sym = get_symbol_index ( temp_symbol ) ;
if ( sym < 0 | | ( symbols [ sym ] . flags & UNKNOWN_SFLAG ) | | symbols [ sym ] . type ! = CONSTANT_T | | symbols [ sym ] . marker ) {
error_named ( " '@(...)' abbreviation expected a known constant value, but contained " , temp_symbol ) ;
2022-07-24 13:10:45 +03:00
}
else {
symbols [ sym ] . flags | = USED_SFLAG ;
j = symbols [ sym ] . value ;
}
}
if ( j > = MAX_DYNAMIC_STRINGS ) {
error_max_dynamic_strings ( j ) ;
j = - 1 ;
}
if ( j + 1 > = no_dynamic_strings )
no_dynamic_strings = j + 1 ;
if ( j > = 0 ) {
write_z_char_g ( ' @ ' ) ;
write_z_char_g ( ' D ' ) ;
write_z_char_g ( ' A ' + ( ( j > > 12 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 8 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 4 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j ) & 0x0F ) ) ;
}
else {
write_z_char_g ( ' ' ) ; /* error fallback */
}
}
2019-02-05 02:44:07 +02:00
else if ( isdigit ( text_in [ i + 1 ] ) ) {
int d1 , d2 ;
d1 = character_digit_value [ text_in [ i + 1 ] ] ;
d2 = character_digit_value [ text_in [ i + 2 ] ] ;
if ( ( d1 = = 127 ) | | ( d1 > = 10 ) | | ( d2 = = 127 ) | | ( d2 > = 10 ) ) {
error ( " '@..' must have two decimal digits " ) ;
}
else {
if ( ! compression_switch )
warning ( " '@..' print variable will not work in non-compressed \
string ; substituting ' ' . " );
i + = 2 ;
j = d1 * 10 + d2 ;
if ( j > = MAX_DYNAMIC_STRINGS ) {
2022-03-03 02:10:25 +02:00
error_max_dynamic_strings ( j ) ;
2022-07-24 13:10:45 +03:00
j = - 1 ;
2019-02-05 02:44:07 +02:00
}
if ( j + 1 > = no_dynamic_strings )
no_dynamic_strings = j + 1 ;
2022-07-24 13:10:45 +03:00
if ( j > = 0 ) {
write_z_char_g ( ' @ ' ) ;
write_z_char_g ( ' D ' ) ;
write_z_char_g ( ' A ' + ( ( j > > 12 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 8 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 4 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j ) & 0x0F ) ) ;
}
else {
write_z_char_g ( ' ' ) ; /* error fallback */
}
2019-02-05 02:44:07 +02:00
}
}
else {
unicode = text_to_unicode ( ( char * ) ( text_in + i ) ) ;
i + = textual_form_length - 1 ;
2020-06-01 20:29:12 +03:00
if ( unicode = = ' @ ' | | unicode = = ' \0 ' ) {
write_z_char_g ( ' @ ' ) ;
write_z_char_g ( unicode ? ' @ ' : ' 0 ' ) ;
}
else if ( unicode > = 0 & & unicode < 256 ) {
2019-02-05 02:44:07 +02:00
write_z_char_g ( unicode ) ;
}
else {
if ( ! compression_switch ) {
warning ( " Unicode characters will not work in non-compressed \
string ; substituting ' ? ' . " );
write_z_char_g ( ' ? ' ) ;
}
else {
j = unicode_entity_index ( unicode ) ;
write_z_char_g ( ' @ ' ) ;
write_z_char_g ( ' U ' ) ;
write_z_char_g ( ' A ' + ( ( j > > 12 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 8 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 4 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j ) & 0x0F ) ) ;
}
}
}
}
else if ( text_in [ i ] = = ' ^ ' )
write_z_char_g ( 0x0A ) ;
else if ( text_in [ i ] = = ' ~ ' )
write_z_char_g ( ' " ' ) ;
else if ( character_set_unicode ) {
if ( text_in [ i ] & 0x80 ) {
unicode = text_to_unicode ( ( char * ) ( text_in + i ) ) ;
i + = textual_form_length - 1 ;
if ( unicode > = 0 & & unicode < 256 ) {
write_z_char_g ( unicode ) ;
}
else {
if ( ! compression_switch ) {
warning ( " Unicode characters will not work in non-compressed \
string ; substituting ' ? ' . " );
write_z_char_g ( ' ? ' ) ;
}
else {
j = unicode_entity_index ( unicode ) ;
write_z_char_g ( ' @ ' ) ;
write_z_char_g ( ' U ' ) ;
write_z_char_g ( ' A ' + ( ( j > > 12 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 8 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 4 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j ) & 0x0F ) ) ;
}
}
}
else {
write_z_char_g ( text_in [ i ] ) ;
}
}
else {
unicode = iso_to_unicode_grid [ text_in [ i ] ] ;
if ( unicode > = 0 & & unicode < 256 ) {
write_z_char_g ( unicode ) ;
}
else {
if ( ! compression_switch ) {
warning ( " Unicode characters will not work in non-compressed \
string ; substituting ' ? ' . " );
write_z_char_g ( ' ? ' ) ;
}
else {
j = unicode_entity_index ( unicode ) ;
write_z_char_g ( ' @ ' ) ;
write_z_char_g ( ' U ' ) ;
write_z_char_g ( ' A ' + ( ( j > > 12 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 8 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j > > 4 ) & 0x0F ) ) ;
write_z_char_g ( ' A ' + ( ( j ) & 0x0F ) ) ;
}
}
}
}
write_z_char_g ( 0 ) ;
2022-07-24 13:10:45 +03:00
zchars_trans_in_last_string = total_zchars_trans - zchars_trans_in_last_string ;
2019-02-05 02:44:07 +02:00
}
if ( text_out_overflow )
2022-03-03 02:10:25 +02:00
return - 1 ;
2019-02-05 02:44:07 +02:00
else
2022-03-03 02:10:25 +02:00
return text_out_pos ;
2019-02-05 02:44:07 +02:00
}
static int unicode_entity_index ( int32 unicode )
{
int j ;
int buck = unicode % UNICODE_HASH_BUCKETS ;
2022-03-03 02:10:25 +02:00
for ( j = unicode_usage_hash [ buck ] ; j > = 0 ; j = unicode_usage_entries [ j ] . next ) {
if ( unicode_usage_entries [ j ] . ch = = unicode )
2019-02-05 02:44:07 +02:00
break ;
}
2022-03-03 02:10:25 +02:00
if ( j < 0 ) {
ensure_memory_list_available ( & unicode_usage_entries_memlist , no_unicode_chars + 1 ) ;
j = no_unicode_chars + + ;
unicode_usage_entries [ j ] . ch = unicode ;
unicode_usage_entries [ j ] . next = unicode_usage_hash [ buck ] ;
unicode_usage_hash [ buck ] = j ;
2019-02-05 02:44:07 +02:00
}
return j ;
}
/* ------------------------------------------------------------------------- */
/* Glulx compression code */
/* ------------------------------------------------------------------------- */
static void compress_makebits ( int entnum , int depth , int prevbit ,
huffbitlist_t * bits ) ;
/* The compressor. This uses the usual Huffman compression algorithm. */
void compress_game_text ( )
{
int entities = 0 , branchstart , branches ;
int numlive ;
int32 lx ;
int jx ;
int ch ;
int32 ix ;
2022-03-03 02:10:25 +02:00
int max_char_set ;
2019-02-05 02:44:07 +02:00
huffbitlist_t bits ;
if ( compression_switch ) {
2022-03-03 02:10:25 +02:00
max_char_set = 257 + no_abbreviations + no_dynamic_strings + no_unicode_chars ;
huff_entities = my_calloc ( sizeof ( huffentity_t ) , max_char_set * 2 + 1 ,
" huffman entities " ) ;
hufflist = my_calloc ( sizeof ( huffentity_t * ) , max_char_set ,
" huffman node list " ) ;
2019-02-05 02:44:07 +02:00
/* How many entities have we currently got? Well, 256 plus the
string - terminator plus Unicode chars plus abbrevations plus
dynamic strings . */
entities = 256 + 1 ;
huff_unicode_start = entities ;
entities + = no_unicode_chars ;
huff_abbrev_start = entities ;
if ( economy_switch )
entities + = no_abbreviations ;
huff_dynam_start = entities ;
entities + = no_dynamic_strings ;
2022-03-03 02:10:25 +02:00
if ( entities > max_char_set )
compiler_error ( " Too many entities for max_char_set " ) ;
2019-02-05 02:44:07 +02:00
/* Characters */
for ( jx = 0 ; jx < 256 ; jx + + ) {
huff_entities [ jx ] . type = 2 ;
huff_entities [ jx ] . count = 0 ;
huff_entities [ jx ] . u . ch = jx ;
}
/* Terminator */
huff_entities [ 256 ] . type = 1 ;
huff_entities [ 256 ] . count = 0 ;
for ( jx = 0 ; jx < no_unicode_chars ; jx + + ) {
huff_entities [ huff_unicode_start + jx ] . type = 4 ;
huff_entities [ huff_unicode_start + jx ] . count = 0 ;
huff_entities [ huff_unicode_start + jx ] . u . val = jx ;
}
if ( economy_switch ) {
for ( jx = 0 ; jx < no_abbreviations ; jx + + ) {
huff_entities [ huff_abbrev_start + jx ] . type = 3 ;
huff_entities [ huff_abbrev_start + jx ] . count = 0 ;
huff_entities [ huff_abbrev_start + jx ] . u . val = jx ;
}
}
for ( jx = 0 ; jx < no_dynamic_strings ; jx + + ) {
huff_entities [ huff_dynam_start + jx ] . type = 9 ;
huff_entities [ huff_dynam_start + jx ] . count = 0 ;
huff_entities [ huff_dynam_start + jx ] . u . val = jx ;
}
}
else {
/* No compression; use defaults that will make it easy to check
for errors . */
no_huff_entities = 257 ;
huff_unicode_start = 257 ;
huff_abbrev_start = 257 ;
2022-03-03 02:10:25 +02:00
huff_dynam_start = 257 + no_abbreviations ;
2019-02-05 02:44:07 +02:00
compression_table_size = 0 ;
}
if ( compression_switch ) {
for ( lx = 0 , ix = 0 ; lx < no_strings ; lx + + ) {
int escapelen = 0 , escapetype = 0 ;
int done = FALSE ;
int32 escapeval = 0 ;
while ( ! done ) {
2022-07-24 13:10:45 +03:00
ch = static_strings_area [ ix ] ;
2019-02-05 02:44:07 +02:00
ix + + ;
if ( ix > static_strings_extent | | ch < 0 )
compiler_error ( " Read too much not-yet-compressed text. " ) ;
if ( escapelen = = - 1 ) {
escapelen = 0 ;
if ( ch = = ' @ ' ) {
ch = ' @ ' ;
}
else if ( ch = = ' 0 ' ) {
ch = ' \0 ' ;
}
else if ( ch = = ' A ' | | ch = = ' D ' | | ch = = ' U ' ) {
escapelen = 4 ;
escapetype = ch ;
escapeval = 0 ;
continue ;
}
else {
compiler_error ( " Strange @ escape in processed text. " ) ;
}
}
else if ( escapelen ) {
escapeval = ( escapeval < < 4 ) | ( ( ch - ' A ' ) & 0x0F ) ;
escapelen - - ;
if ( escapelen = = 0 ) {
if ( escapetype = = ' A ' ) {
ch = huff_abbrev_start + escapeval ;
}
else if ( escapetype = = ' D ' ) {
ch = huff_dynam_start + escapeval ;
}
else if ( escapetype = = ' U ' ) {
ch = huff_unicode_start + escapeval ;
}
else {
compiler_error ( " Strange @ escape in processed text. " ) ;
}
}
else
continue ;
}
else {
if ( ch = = ' @ ' ) {
escapelen = - 1 ;
continue ;
}
if ( ch = = 0 ) {
ch = 256 ;
done = TRUE ;
}
}
huff_entities [ ch ] . count + + ;
}
}
numlive = 0 ;
for ( jx = 0 ; jx < entities ; jx + + ) {
if ( huff_entities [ jx ] . count ) {
hufflist [ numlive ] = & ( huff_entities [ jx ] ) ;
numlive + + ;
}
}
branchstart = entities ;
branches = 0 ;
while ( numlive > 1 ) {
int best1 , best2 ;
int best1num , best2num ;
huffentity_t * bran ;
if ( hufflist [ 0 ] - > count < hufflist [ 1 ] - > count ) {
best1 = 0 ;
best2 = 1 ;
}
else {
best2 = 0 ;
best1 = 1 ;
}
best1num = hufflist [ best1 ] - > count ;
best2num = hufflist [ best2 ] - > count ;
for ( jx = 2 ; jx < numlive ; jx + + ) {
if ( hufflist [ jx ] - > count < best1num ) {
best2 = best1 ;
best2num = best1num ;
best1 = jx ;
best1num = hufflist [ best1 ] - > count ;
}
else if ( hufflist [ jx ] - > count < best2num ) {
best2 = jx ;
best2num = hufflist [ best2 ] - > count ;
}
}
bran = & ( huff_entities [ branchstart + branches ] ) ;
branches + + ;
bran - > type = 0 ;
bran - > count = hufflist [ best1 ] - > count + hufflist [ best2 ] - > count ;
bran - > u . branch [ 0 ] = ( hufflist [ best1 ] - huff_entities ) ;
bran - > u . branch [ 1 ] = ( hufflist [ best2 ] - huff_entities ) ;
hufflist [ best1 ] = bran ;
if ( best2 < numlive - 1 ) {
memmove ( & ( hufflist [ best2 ] ) , & ( hufflist [ best2 + 1 ] ) ,
( ( numlive - 1 ) - best2 ) * sizeof ( huffentity_t * ) ) ;
}
numlive - - ;
}
huff_entity_root = ( hufflist [ 0 ] - huff_entities ) ;
for ( ix = 0 ; ix < MAXHUFFBYTES ; ix + + )
bits . b [ ix ] = 0 ;
compression_table_size = 12 ;
no_huff_entities = 0 ; /* compress_makebits will total this up */
compress_makebits ( huff_entity_root , 0 , - 1 , & bits ) ;
}
/* Now, sadly, we have to compute the size of the string section,
without actually doing the compression . */
compression_string_size = 0 ;
2022-03-03 02:10:25 +02:00
ensure_memory_list_available ( & compressed_offsets_memlist , no_strings ) ;
2019-02-05 02:44:07 +02:00
for ( lx = 0 , ix = 0 ; lx < no_strings ; lx + + ) {
int escapelen = 0 , escapetype = 0 ;
int done = FALSE ;
int32 escapeval = 0 ;
jx = 0 ;
compressed_offsets [ lx ] = compression_table_size + compression_string_size ;
compression_string_size + + ; /* for the type byte */
while ( ! done ) {
2022-07-24 13:10:45 +03:00
ch = static_strings_area [ ix ] ;
2019-02-05 02:44:07 +02:00
ix + + ;
if ( ix > static_strings_extent | | ch < 0 )
compiler_error ( " Read too much not-yet-compressed text. " ) ;
if ( escapelen = = - 1 ) {
escapelen = 0 ;
if ( ch = = ' @ ' ) {
ch = ' @ ' ;
}
else if ( ch = = ' 0 ' ) {
ch = ' \0 ' ;
}
else if ( ch = = ' A ' | | ch = = ' D ' | | ch = = ' U ' ) {
escapelen = 4 ;
escapetype = ch ;
escapeval = 0 ;
continue ;
}
else {
compiler_error ( " Strange @ escape in processed text. " ) ;
}
}
else if ( escapelen ) {
escapeval = ( escapeval < < 4 ) | ( ( ch - ' A ' ) & 0x0F ) ;
escapelen - - ;
if ( escapelen = = 0 ) {
if ( escapetype = = ' A ' ) {
ch = huff_abbrev_start + escapeval ;
}
else if ( escapetype = = ' D ' ) {
ch = huff_dynam_start + escapeval ;
}
else if ( escapetype = = ' U ' ) {
ch = huff_unicode_start + escapeval ;
}
else {
compiler_error ( " Strange @ escape in processed text. " ) ;
}
}
else
continue ;
}
else {
if ( ch = = ' @ ' ) {
escapelen = - 1 ;
continue ;
}
if ( ch = = 0 ) {
ch = 256 ;
done = TRUE ;
}
}
if ( compression_switch ) {
jx + = huff_entities [ ch ] . depth ;
compression_string_size + = ( jx / 8 ) ;
jx = ( jx % 8 ) ;
}
else {
if ( ch > = huff_dynam_start ) {
compression_string_size + = 3 ;
}
else if ( ch > = huff_unicode_start ) {
compiler_error ( " Abbreviation/Unicode in non-compressed string \
should be impossible . " );
}
else
compression_string_size + = 1 ;
}
}
if ( compression_switch & & jx )
compression_string_size + + ;
}
done_compression = TRUE ;
}
static void compress_makebits ( int entnum , int depth , int prevbit ,
huffbitlist_t * bits )
{
huffentity_t * ent = & ( huff_entities [ entnum ] ) ;
char * cx ;
no_huff_entities + + ;
ent - > addr = compression_table_size ;
ent - > depth = depth ;
ent - > bits = * bits ;
if ( depth > 0 ) {
if ( prevbit )
ent - > bits . b [ ( depth - 1 ) / 8 ] | = ( 1 < < ( ( depth - 1 ) % 8 ) ) ;
}
switch ( ent - > type ) {
case 0 :
compression_table_size + = 9 ;
compress_makebits ( ent - > u . branch [ 0 ] , depth + 1 , 0 , & ent - > bits ) ;
compress_makebits ( ent - > u . branch [ 1 ] , depth + 1 , 1 , & ent - > bits ) ;
break ;
case 1 :
compression_table_size + = 1 ;
break ;
case 2 :
compression_table_size + = 2 ;
break ;
case 3 :
2024-02-12 01:08:33 +02:00
cx = abbreviation_text ( ent - > u . val ) ;
2019-02-05 02:44:07 +02:00
compression_table_size + = ( 1 + 1 + strlen ( cx ) ) ;
break ;
case 4 :
case 9 :
compression_table_size + = 5 ;
break ;
}
}
/* ------------------------------------------------------------------------- */
/* The abbreviations optimiser */
/* */
/* This is a very complex, memory and time expensive algorithm to */
/* approximately solve the problem of which abbreviation strings would */
/* minimise the total number of Z-chars to which the game text translates. */
/* It is in some ways a quite separate program but remains inside Inform */
/* for compatibility with previous releases. */
/* ------------------------------------------------------------------------- */
2022-03-03 02:10:25 +02:00
/* The complete game text. */
static char * opttext ;
static int32 opttextlen ;
2019-02-05 02:44:07 +02:00
typedef struct tlb_s
{ char text [ 4 ] ;
int32 intab , occurrences ;
} tlb ;
2022-03-03 02:10:25 +02:00
static tlb * tlbtab ; /* Three-letter blocks (allocated up to no_occs) */
static memory_list tlbtab_memlist ;
2019-02-05 02:44:07 +02:00
static int32 no_occs ;
static int32 * grandtable ;
static int32 * grandflags ;
typedef struct optab_s
{ int32 length ;
int32 popularity ;
int32 score ;
int32 location ;
2024-02-12 01:08:33 +02:00
char * text ; /* allocated to textsize, min 4 */
int32 textsize ;
2019-02-05 02:44:07 +02:00
} optab ;
2022-03-03 02:10:25 +02:00
static int32 MAX_BESTYET ;
static optab * bestyet ; /* High-score entries (up to MAX_BESTYET used/allocated) */
static optab * bestyet2 ; /* The selected entries (up to selected used; allocated to MAX_ABBREVS) */
2019-02-05 02:44:07 +02:00
2024-02-12 01:08:33 +02:00
static void optab_copy ( optab * dest , const optab * src )
{
dest - > length = src - > length ;
dest - > popularity = src - > popularity ;
dest - > score = src - > score ;
dest - > location = src - > location ;
if ( src - > length + 1 > dest - > textsize ) {
int32 oldsize = dest - > textsize ;
dest - > textsize = ( src - > length + 1 ) * 2 ;
my_realloc ( & dest - > text , oldsize , dest - > textsize , " bestyet2.text " ) ;
}
strcpy ( dest - > text , src - > text ) ;
}
2019-02-05 02:44:07 +02:00
static int pass_no ;
static void optimise_pass ( void )
2022-03-03 02:10:25 +02:00
{
TIMEVALUE t1 , t2 ;
float duration ;
int32 i ;
2019-02-05 02:44:07 +02:00
int32 j , j2 , k , nl , matches , noflags , score , min , minat = 0 , x , scrabble , c ;
2022-03-03 02:10:25 +02:00
for ( i = 0 ; i < MAX_BESTYET ; i + + ) bestyet [ i ] . length = 0 ;
2019-02-05 02:44:07 +02:00
for ( i = 0 ; i < no_occs ; i + + )
{ if ( ( * ( tlbtab [ i ] . text ) ! = ( int ) ' \n ' ) & & ( tlbtab [ i ] . occurrences ! = 0 ) )
{
# ifdef MAC_FACE
if ( i % ( ( * * g_pm_hndl ) . linespercheck ) = = 0 )
{ ProcessEvents ( & g_proc ) ;
if ( g_proc ! = true )
2022-03-03 02:10:25 +02:00
{ ao_free_arrays ( ) ;
2019-02-05 02:44:07 +02:00
longjmp ( g_fallback , 1 ) ;
}
}
# endif
2022-07-24 13:10:45 +03:00
if ( optabbrevs_trace_setting > = 2 ) {
printf ( " Pass %d, %4ld/%ld '%s' (%ld occurrences) " ,
pass_no , ( long int ) i , ( long int ) no_occs , tlbtab [ i ] . text ,
( long int ) tlbtab [ i ] . occurrences ) ;
}
2022-03-03 02:10:25 +02:00
TIMEVALUE_NOW ( & t1 ) ;
2019-02-05 02:44:07 +02:00
for ( j = 0 ; j < tlbtab [ i ] . occurrences ; j + + )
{ for ( j2 = 0 ; j2 < tlbtab [ i ] . occurrences ; j2 + + ) grandflags [ j2 ] = 1 ;
nl = 2 ; noflags = tlbtab [ i ] . occurrences ;
2024-02-12 01:08:33 +02:00
while ( noflags > = 2 )
2019-02-05 02:44:07 +02:00
{ nl + + ;
for ( j2 = 0 ; j2 < nl ; j2 + + )
2022-03-03 02:10:25 +02:00
if ( opttext [ grandtable [ tlbtab [ i ] . intab + j ] + j2 ] = = ' \n ' )
2019-02-05 02:44:07 +02:00
goto FinishEarly ;
matches = 0 ;
for ( j2 = j ; j2 < tlbtab [ i ] . occurrences ; j2 + + )
{ if ( grandflags [ j2 ] = = 1 )
{ x = grandtable [ tlbtab [ i ] . intab + j2 ]
- grandtable [ tlbtab [ i ] . intab + j ] ;
if ( ( ( x > - nl ) & & ( x < nl ) )
2022-03-03 02:10:25 +02:00
| | ( memcmp ( opttext + grandtable [ tlbtab [ i ] . intab + j ] ,
opttext + grandtable [ tlbtab [ i ] . intab + j2 ] ,
2019-02-05 02:44:07 +02:00
nl ) ! = 0 ) )
{ grandflags [ j2 ] = 0 ; noflags - - ; }
else matches + + ;
}
}
scrabble = 0 ;
for ( k = 0 ; k < nl ; k + + )
{ scrabble + + ;
2022-03-03 02:10:25 +02:00
c = opttext [ grandtable [ tlbtab [ i ] . intab + j + k ] ] ;
2019-02-05 02:44:07 +02:00
if ( c ! = ( int ) ' ' )
{ if ( iso_to_alphabet_grid [ c ] < 0 )
scrabble + = 2 ;
else
if ( iso_to_alphabet_grid [ c ] > = 26 )
scrabble + + ;
}
}
score = ( matches - 1 ) * ( scrabble - 2 ) ;
min = score ;
2022-03-03 02:10:25 +02:00
for ( j2 = 0 ; j2 < MAX_BESTYET ; j2 + + )
2019-02-05 02:44:07 +02:00
{ if ( ( nl = = bestyet [ j2 ] . length )
2022-03-03 02:10:25 +02:00
& & ( memcmp ( opttext + bestyet [ j2 ] . location ,
opttext + grandtable [ tlbtab [ i ] . intab + j ] ,
2019-02-05 02:44:07 +02:00
nl ) = = 0 ) )
2022-03-03 02:10:25 +02:00
{ j2 = MAX_BESTYET ; min = score ; }
2019-02-05 02:44:07 +02:00
else
{ if ( bestyet [ j2 ] . score < min )
{ min = bestyet [ j2 ] . score ; minat = j2 ;
}
}
}
if ( min ! = score )
{ bestyet [ minat ] . score = score ;
bestyet [ minat ] . length = nl ;
bestyet [ minat ] . location = grandtable [ tlbtab [ i ] . intab + j ] ;
bestyet [ minat ] . popularity = matches ;
}
}
FinishEarly : ;
}
2022-07-24 13:10:45 +03:00
if ( optabbrevs_trace_setting > = 2 ) {
TIMEVALUE_NOW ( & t2 ) ;
duration = TIMEVALUE_DIFFERENCE ( & t1 , & t2 ) ;
printf ( " (%.4f seconds) \n " , duration ) ;
}
2019-02-05 02:44:07 +02:00
}
}
}
static int any_overlap ( char * s1 , char * s2 )
{ int a , b , i , j , flag ;
a = strlen ( s1 ) ; b = strlen ( s2 ) ;
for ( i = 1 - b ; i < a ; i + + )
{ flag = 0 ;
for ( j = 0 ; j < b ; j + + )
if ( ( 0 < = i + j ) & & ( i + j < = a - 1 ) )
if ( s1 [ i + j ] ! = s2 [ j ] ) flag = 1 ;
if ( flag = = 0 ) return ( 1 ) ;
}
return ( 0 ) ;
}
extern void optimise_abbreviations ( void )
2022-03-03 02:10:25 +02:00
{ int32 i , j , tcount , max = 0 , MAX_GTABLE ;
2019-02-05 02:44:07 +02:00
int32 j2 , selected , available , maxat = 0 , nl ;
2022-03-03 02:10:25 +02:00
if ( opttext = = NULL )
return ;
/* We insist that the first two abbreviations will be ". " and ", ". */
if ( MAX_ABBREVS < 2 )
return ;
/* Note that it's safe to access opttext[opttextlen+2]. There are
two newlines and a null beyond opttextlen . */
2019-02-05 02:44:07 +02:00
printf ( " Beginning calculation of optimal abbreviations... \n " ) ;
pass_no = 0 ;
2022-03-03 02:10:25 +02:00
initialise_memory_list ( & tlbtab_memlist ,
sizeof ( tlb ) , 1000 , ( void * * ) & tlbtab ,
" three-letter-blocks buffer " ) ;
no_occs = 0 ;
/* Not sure what the optimal size is for MAX_BESTYET. The original code always created 64 abbreviations and used MAX_BESTYET=256. I'm guessing that 4*MAX_ABBREVS is reasonable. */
MAX_BESTYET = 4 * MAX_ABBREVS ;
bestyet = my_calloc ( sizeof ( optab ) , MAX_BESTYET , " bestyet " ) ;
2024-02-12 01:08:33 +02:00
for ( i = 0 ; i < MAX_BESTYET ; i + + ) {
bestyet [ i ] . length = 0 ;
bestyet [ i ] . popularity = 0 ;
bestyet [ i ] . score = 0 ;
bestyet [ i ] . location = 0 ;
bestyet [ i ] . textsize = 4 ;
bestyet [ i ] . text = my_malloc ( bestyet [ i ] . textsize , " bestyet.text " ) ;
}
2022-03-03 02:10:25 +02:00
bestyet2 = my_calloc ( sizeof ( optab ) , MAX_ABBREVS , " bestyet2 " ) ;
2024-02-12 01:08:33 +02:00
for ( i = 0 ; i < MAX_ABBREVS ; i + + ) {
bestyet2 [ i ] . length = 0 ;
bestyet2 [ i ] . popularity = 0 ;
bestyet2 [ i ] . score = 0 ;
bestyet2 [ i ] . location = 0 ;
bestyet2 [ i ] . textsize = 4 ;
bestyet2 [ i ] . text = my_malloc ( bestyet2 [ i ] . textsize , " bestyet2.text " ) ;
}
2019-02-05 02:44:07 +02:00
bestyet2 [ 0 ] . text [ 0 ] = ' . ' ;
bestyet2 [ 0 ] . text [ 1 ] = ' ' ;
bestyet2 [ 0 ] . text [ 2 ] = 0 ;
bestyet2 [ 1 ] . text [ 0 ] = ' , ' ;
bestyet2 [ 1 ] . text [ 1 ] = ' ' ;
bestyet2 [ 1 ] . text [ 2 ] = 0 ;
2022-03-03 02:10:25 +02:00
selected = 2 ;
for ( i = 0 ; i < opttextlen ; i + + )
2019-02-05 02:44:07 +02:00
{
2022-03-03 02:10:25 +02:00
if ( ( opttext [ i ] = = ' . ' ) & & ( opttext [ i + 1 ] = = ' ' ) & & ( opttext [ i + 2 ] = = ' ' ) )
{ opttext [ i ] = ' \n ' ; opttext [ i + 1 ] = ' \n ' ; opttext [ i + 2 ] = ' \n ' ;
2019-02-05 02:44:07 +02:00
bestyet2 [ 0 ] . popularity + + ;
}
2022-03-03 02:10:25 +02:00
if ( ( opttext [ i ] = = ' . ' ) & & ( opttext [ i + 1 ] = = ' ' ) )
{ opttext [ i ] = ' \n ' ; opttext [ i + 1 ] = ' \n ' ;
2019-02-05 02:44:07 +02:00
bestyet2 [ 0 ] . popularity + + ;
}
2022-03-03 02:10:25 +02:00
if ( ( opttext [ i ] = = ' , ' ) & & ( opttext [ i + 1 ] = = ' ' ) )
{ opttext [ i ] = ' \n ' ; opttext [ i + 1 ] = ' \n ' ;
2019-02-05 02:44:07 +02:00
bestyet2 [ 1 ] . popularity + + ;
}
}
2022-03-03 02:10:25 +02:00
MAX_GTABLE = opttextlen + 1 ;
2019-02-05 02:44:07 +02:00
grandtable = my_calloc ( 4 * sizeof ( int32 ) , MAX_GTABLE / 4 , " grandtable " ) ;
2022-03-03 02:10:25 +02:00
for ( i = 0 , tcount = 0 ; i < opttextlen ; i + + )
{
tlb test ;
test . text [ 0 ] = opttext [ i ] ;
test . text [ 1 ] = opttext [ i + 1 ] ;
test . text [ 2 ] = opttext [ i + 2 ] ;
2019-02-05 02:44:07 +02:00
test . text [ 3 ] = 0 ;
if ( ( test . text [ 0 ] = = ' \n ' ) | | ( test . text [ 1 ] = = ' \n ' ) | | ( test . text [ 2 ] = = ' \n ' ) )
goto DontKeep ;
2022-03-03 02:10:25 +02:00
for ( j = 0 ; j < no_occs ; j + + ) {
2019-02-05 02:44:07 +02:00
if ( strcmp ( test . text , tlbtab [ j ] . text ) = = 0 )
goto DontKeep ;
2022-03-03 02:10:25 +02:00
}
2019-02-05 02:44:07 +02:00
test . occurrences = 0 ;
2022-03-03 02:10:25 +02:00
test . intab = 0 ;
for ( j = i + 3 ; j < opttextlen ; j + + )
2019-02-05 02:44:07 +02:00
{
# ifdef MAC_FACE
if ( j % ( ( * * g_pm_hndl ) . linespercheck ) = = 0 )
{ ProcessEvents ( & g_proc ) ;
if ( g_proc ! = true )
2022-03-03 02:10:25 +02:00
{ ao_free_arrays ( ) ;
2019-02-05 02:44:07 +02:00
longjmp ( g_fallback , 1 ) ;
}
}
# endif
2022-03-03 02:10:25 +02:00
if ( ( opttext [ i ] = = opttext [ j ] )
& & ( opttext [ i + 1 ] = = opttext [ j + 1 ] )
& & ( opttext [ i + 2 ] = = opttext [ j + 2 ] ) )
{ grandtable [ tcount + test . occurrences ] = j ;
2019-02-05 02:44:07 +02:00
test . occurrences + + ;
2022-03-03 02:10:25 +02:00
if ( tcount + test . occurrences = = MAX_GTABLE )
2019-02-05 02:44:07 +02:00
{ printf ( " All %ld cross-references used \n " ,
( long int ) MAX_GTABLE ) ;
goto Built ;
}
}
}
if ( test . occurrences > = 2 )
2022-03-03 02:10:25 +02:00
{
ensure_memory_list_available ( & tlbtab_memlist , no_occs + 1 ) ;
tlbtab [ no_occs ] = test ;
tlbtab [ no_occs ] . intab = tcount ;
tcount + = tlbtab [ no_occs ] . occurrences ;
2019-02-05 02:44:07 +02:00
if ( max < tlbtab [ no_occs ] . occurrences )
max = tlbtab [ no_occs ] . occurrences ;
no_occs + + ;
}
DontKeep : ;
}
Built :
grandflags = my_calloc ( sizeof ( int ) , max , " grandflags " ) ;
2022-07-24 13:10:45 +03:00
if ( optabbrevs_trace_setting > = 1 ) {
printf ( " Cross-reference table (%ld entries) built... \n " ,
( long int ) no_occs ) ;
}
2019-02-05 02:44:07 +02:00
/* for (i=0; i<no_occs; i++)
printf ( " %4d %4d '%s' %d \n " , i , tlbtab [ i ] . intab , tlbtab [ i ] . text ,
tlbtab [ i ] . occurrences ) ;
*/
2022-03-03 02:10:25 +02:00
for ( i = 0 ; i < MAX_ABBREVS ; i + + ) bestyet2 [ i ] . length = 0 ;
available = MAX_BESTYET ;
while ( ( available > 0 ) & & ( selected < MAX_ABBREVS ) )
2022-07-24 13:10:45 +03:00
{
pass_no + + ;
if ( optabbrevs_trace_setting > = 1 ) {
printf ( " Pass %d \n " , pass_no ) ;
}
2019-02-05 02:44:07 +02:00
optimise_pass ( ) ;
available = 0 ;
2022-03-03 02:10:25 +02:00
for ( i = 0 ; i < MAX_BESTYET ; i + + )
2019-02-05 02:44:07 +02:00
if ( bestyet [ i ] . score ! = 0 )
{ available + + ;
nl = bestyet [ i ] . length ;
2024-02-12 01:08:33 +02:00
if ( nl + 1 > bestyet [ i ] . textsize ) {
int32 oldsize = bestyet [ i ] . textsize ;
bestyet [ i ] . textsize = ( nl + 1 ) * 2 ;
my_realloc ( & bestyet [ i ] . text , oldsize , bestyet [ i ] . textsize , " bestyet.text " ) ;
}
2019-02-05 02:44:07 +02:00
for ( j2 = 0 ; j2 < nl ; j2 + + ) bestyet [ i ] . text [ j2 ] =
2022-03-03 02:10:25 +02:00
opttext [ bestyet [ i ] . location + j2 ] ;
2019-02-05 02:44:07 +02:00
bestyet [ i ] . text [ nl ] = 0 ;
}
/* printf("End of pass results:\n");
printf ( " \n no score freq string \n " ) ;
2022-03-03 02:10:25 +02:00
for ( i = 0 ; i < MAX_BESTYET ; i + + )
2019-02-05 02:44:07 +02:00
if ( bestyet [ i ] . score > 0 )
printf ( " %02d: %4d %4d '%s' \n " , i , bestyet [ i ] . score ,
bestyet [ i ] . popularity , bestyet [ i ] . text ) ;
*/
do
{ max = 0 ;
2022-03-03 02:10:25 +02:00
for ( i = 0 ; i < MAX_BESTYET ; i + + )
2019-02-05 02:44:07 +02:00
if ( max < bestyet [ i ] . score )
{ max = bestyet [ i ] . score ;
maxat = i ;
}
if ( max > 0 )
2022-03-03 02:10:25 +02:00
{
char testtext [ 4 ] ;
2024-02-12 01:08:33 +02:00
optab_copy ( & bestyet2 [ selected + + ] , & bestyet [ maxat ] ) ;
2019-02-05 02:44:07 +02:00
2022-07-24 13:10:45 +03:00
if ( optabbrevs_trace_setting > = 1 ) {
printf (
" Selection %2ld: '%s' (repeated %ld times, scoring %ld) \n " ,
( long int ) selected , bestyet [ maxat ] . text ,
( long int ) bestyet [ maxat ] . popularity ,
( long int ) bestyet [ maxat ] . score ) ;
}
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
testtext [ 0 ] = bestyet [ maxat ] . text [ 0 ] ;
testtext [ 1 ] = bestyet [ maxat ] . text [ 1 ] ;
testtext [ 2 ] = bestyet [ maxat ] . text [ 2 ] ;
testtext [ 3 ] = 0 ;
2019-02-05 02:44:07 +02:00
for ( i = 0 ; i < no_occs ; i + + )
2022-03-03 02:10:25 +02:00
if ( strcmp ( testtext , tlbtab [ i ] . text ) = = 0 )
2019-02-05 02:44:07 +02:00
break ;
for ( j = 0 ; j < tlbtab [ i ] . occurrences ; j + + )
{ if ( memcmp ( bestyet [ maxat ] . text ,
2022-03-03 02:10:25 +02:00
opttext + grandtable [ tlbtab [ i ] . intab + j ] ,
2019-02-05 02:44:07 +02:00
bestyet [ maxat ] . length ) = = 0 )
{ for ( j2 = 0 ; j2 < bestyet [ maxat ] . length ; j2 + + )
2022-03-03 02:10:25 +02:00
opttext [ grandtable [ tlbtab [ i ] . intab + j ] + j2 ] = ' \n ' ;
2019-02-05 02:44:07 +02:00
}
}
2022-03-03 02:10:25 +02:00
for ( i = 0 ; i < MAX_BESTYET ; i + + )
2019-02-05 02:44:07 +02:00
if ( ( bestyet [ i ] . score > 0 ) & &
( any_overlap ( bestyet [ maxat ] . text , bestyet [ i ] . text ) = = 1 ) )
{ bestyet [ i ] . score = 0 ;
/* printf("Discarding '%s' as overlapping\n",
bestyet [ i ] . text ) ; */
}
}
2022-03-03 02:10:25 +02:00
} while ( ( max > 0 ) & & ( available > 0 ) & & ( selected < MAX_ABBREVS ) ) ;
2019-02-05 02:44:07 +02:00
}
printf ( " \n Chosen abbreviations (in Inform syntax): \n \n " ) ;
for ( i = 0 ; i < selected ; i + + )
printf ( " Abbreviate \" %s \" ; \n " , bestyet2 [ i ] . text ) ;
text_free_arrays ( ) ;
}
/* ------------------------------------------------------------------------- */
/* The dictionary manager begins here. */
/* */
/* Speed is extremely important in these algorithms. If a linear-time */
/* routine were used to search the dictionary words so far built up, then */
/* Inform would crawl. */
/* */
/* Instead, the dictionary is stored as a binary tree, which is kept */
/* balanced with the red-black algorithm. */
/* ------------------------------------------------------------------------- */
/* A dictionary table similar to the Z-machine format is kept: there is a */
/* 7-byte header (left blank here to be filled in at the */
/* construct_storyfile() stage in "tables.c") and then a sequence of */
/* records, one per word, in the form */
/* */
/* <Z-coded text> <flags> <verbnumber> <adjectivenumber> */
/* 4 or 6 bytes byte byte byte */
/* */
2022-03-03 02:10:25 +02:00
/* For Glulx, the form is instead: (See below about Unicode-valued */
/* dictionaries and DICT_WORD_BYTES.) */
2019-02-05 02:44:07 +02:00
/* */
2022-03-03 02:10:25 +02:00
/* <tag> <plain text> <flags> <verbnumber> <adjectivenumber> */
/* $60 DICT_WORD_BYTES short short short */
2019-02-05 02:44:07 +02:00
/* */
/* These records are stored in "accession order" (i.e. in order of their */
/* first being received by these routines) and only alphabetically sorted */
/* by construct_storyfile() (using the array below). */
/* ------------------------------------------------------------------------- */
/* */
/* Further notes about the data fields... */
2024-02-27 00:51:15 +02:00
/* */
/* The flags in the first field are as defined in header.h */
/* (*_DFLAG values). */
2019-02-05 02:44:07 +02:00
/* */
/* In grammar version 2, the third field (adjectivenumber) is unused (and */
2024-02-27 00:51:15 +02:00
/* zero). It may be omitted entirely with the ZCODE_LESS_DICT_DATA option. */
2019-02-05 02:44:07 +02:00
/* */
/* The compiler generates special constants #dict_par1, #dict_par2, */
/* #dict_par3 to refer to the byte offsets of the three fields. In */
/* Z-code v3, these are 4/5/6; in v4+, they are 6/7/8. In Glulx, they */
/* are $DICT_WORD_SIZE+2/4/6, referring to the *low* bytes of the three */
/* fields. (The high bytes are $DICT_WORD_SIZE+1/3/5.) */
/* ------------------------------------------------------------------------- */
2022-03-03 02:10:25 +02:00
uchar * dictionary ; /* (These two variables are externally
2019-02-05 02:44:07 +02:00
used only in " tables.c " when
building the story - file ) */
2022-03-03 02:10:25 +02:00
static memory_list dictionary_memlist ;
int32 dictionary_top ; /* Position of the next free record
in dictionary ( i . e . , the current
number of bytes ) */
2019-02-05 02:44:07 +02:00
int dict_entries ; /* Total number of records entered */
/* ------------------------------------------------------------------------- */
2022-03-03 02:10:25 +02:00
/* dict_word was originally a typedef for a struct of 6 unsigned chars. */
/* It held the (4 or) 6 bytes of Z-coded text of a word. */
2019-02-05 02:44:07 +02:00
/* Usefully, because the PAD character 5 is < all alphabetic characters, */
/* alphabetic order corresponds to numeric order. For this reason, the */
/* dict_word is called the "sort code" of the original text word. */
/* */
2022-03-03 02:10:25 +02:00
/* In modifying the compiler for Glulx, I found it easier to discard the */
2019-02-05 02:44:07 +02:00
/* typedef, and operate directly on uchar arrays of length DICT_WORD_SIZE. */
/* In Z-code, DICT_WORD_SIZE will be 6, so the Z-code compiler will work */
2024-02-12 01:08:33 +02:00
/* as before. In Glulx, it can be any value. */
2019-02-05 02:44:07 +02:00
/* */
2024-02-12 01:08:33 +02:00
/* In further modifying the compiler to generate a Unicode dictionary, */
/* I have to store four-byte values in the uchar array. We make the array */
/* size DICT_WORD_BYTES (which is DICT_WORD_SIZE*DICT_CHAR_SIZE). */
2019-02-05 02:44:07 +02:00
/* Then we store the 32-bit character value big-endian. This lets us */
/* continue to compare arrays bytewise, which is a nice simplification. */
/* ------------------------------------------------------------------------- */
extern int compare_sorts ( uchar * d1 , uchar * d2 )
{ int i ;
for ( i = 0 ; i < DICT_WORD_BYTES ; i + + )
if ( d1 [ i ] ! = d2 [ i ] ) return ( ( int ) ( d1 [ i ] ) - ( int ) ( d2 [ i ] ) ) ;
/* (since memcmp(d1, d2, DICT_WORD_BYTES); runs into a bug on some Unix
libraries ) */
return ( 0 ) ;
}
extern void copy_sorts ( uchar * d1 , uchar * d2 )
{ int i ;
for ( i = 0 ; i < DICT_WORD_BYTES ; i + + )
d1 [ i ] = d2 [ i ] ;
}
2024-02-12 01:08:33 +02:00
static memory_list prepared_sort_memlist ;
static uchar * prepared_sort ; /* Holds the sort code of current word */
2019-02-05 02:44:07 +02:00
2024-02-12 01:08:33 +02:00
static int prepared_dictflags_pos ; /* Dict flags set by the current word */
static int prepared_dictflags_neg ; /* Dict flags *not* set by the word */
2019-02-05 02:44:07 +02:00
/* Also used by verbs.c */
static void dictionary_prepare_z ( char * dword , uchar * optresult )
2024-02-12 01:08:33 +02:00
{ int i , j , k , k2 , wd [ 13 ] ;
int32 tot ;
int negflag ;
2019-02-05 02:44:07 +02:00
/* A rapid text translation algorithm using only the simplified rules
applying to the text of dictionary entries : first produce a sequence
of 6 ( v3 ) or 9 ( v4 + ) Z - characters */
2022-03-03 02:10:25 +02:00
int dictsize = ( version_number = = 3 ) ? 6 : 9 ;
2024-02-12 01:08:33 +02:00
prepared_dictflags_pos = 0 ;
prepared_dictflags_neg = 0 ;
2019-02-05 02:44:07 +02:00
2024-02-12 01:08:33 +02:00
for ( i = 0 , j = 0 ; dword [ j ] ! = 0 ; j + + )
{
if ( ( dword [ j ] = = ' / ' ) & & ( dword [ j + 1 ] = = ' / ' ) )
{
/* The rest of the word is dict flags. Run through them. */
negflag = FALSE ;
for ( j + = 2 ; dword [ j ] ! = 0 ; j + + )
{
switch ( dword [ j ] )
{
case ' ~ ' :
if ( ! dword [ j + 1 ] )
2024-02-27 00:51:15 +02:00
error_named ( " '//~' with no flag character (psn) in dict word " , dword ) ;
2024-02-12 01:08:33 +02:00
negflag = ! negflag ;
break ;
case ' p ' :
if ( ! negflag )
2024-02-27 00:51:15 +02:00
prepared_dictflags_pos | = PLURAL_DFLAG ;
else
prepared_dictflags_neg | = PLURAL_DFLAG ;
negflag = FALSE ;
break ;
case ' s ' :
if ( ! negflag )
prepared_dictflags_pos | = SING_DFLAG ;
2024-02-12 01:08:33 +02:00
else
2024-02-27 00:51:15 +02:00
prepared_dictflags_neg | = SING_DFLAG ;
2024-02-12 01:08:33 +02:00
negflag = FALSE ;
break ;
case ' n ' :
if ( ! negflag )
2024-02-27 00:51:15 +02:00
prepared_dictflags_pos | = NOUN_DFLAG ;
2024-02-12 01:08:33 +02:00
else
2024-02-27 00:51:15 +02:00
prepared_dictflags_neg | = NOUN_DFLAG ;
2024-02-12 01:08:33 +02:00
negflag = FALSE ;
break ;
2019-02-05 02:44:07 +02:00
default :
2024-02-27 00:51:15 +02:00
error_named ( " Expected flag character (psn~) after '//' in dict word " , dword ) ;
2019-02-05 02:44:07 +02:00
break ;
}
}
break ;
}
2024-02-12 01:08:33 +02:00
/* LONG_DICT_FLAG_BUG emulates the old behavior where we stop looping
at dictsize . */
if ( LONG_DICT_FLAG_BUG & & i > = dictsize )
break ;
2019-02-05 02:44:07 +02:00
k = ( int ) dword [ j ] ;
if ( k = = ( int ) ' \' ' )
warning_named ( " Obsolete usage: use the ^ character for the \
apostrophe in " , dword);
if ( k = = ( int ) ' ^ ' ) k = ( int ) ' \' ' ;
if ( k = = ' \" ' ) k = ' ~ ' ;
if ( k = = ( int ) ' @ ' | | ( character_set_unicode & & ( k & 0x80 ) ) )
{ int unicode = text_to_unicode ( dword + j ) ;
if ( ( unicode < 128 ) & & isupper ( unicode ) ) unicode = tolower ( unicode ) ;
k = unicode_to_zscii ( unicode ) ;
j + = textual_form_length - 1 ;
if ( ( k = = 5 ) | | ( k > = 0x100 ) )
{ unicode_char_error (
" Character can be printed but not input: " , unicode ) ;
k = ' ? ' ;
}
k2 = zscii_to_alphabet_grid [ ( uchar ) k ] ;
}
else
{ if ( isupper ( k ) ) k = tolower ( k ) ;
k2 = iso_to_alphabet_grid [ ( uchar ) k ] ;
}
if ( k2 < 0 )
{ if ( ( k2 = = - 5 ) | | ( k2 < = - 0x100 ) )
char_error ( " Character can be printed but not input: " , k ) ;
else
{ /* Use 4 more Z-chars to encode a ZSCII escape sequence */
2024-02-12 01:08:33 +02:00
if ( i < dictsize )
wd [ i + + ] = 5 ;
if ( i < dictsize )
wd [ i + + ] = 6 ;
2019-02-05 02:44:07 +02:00
k2 = - k2 ;
2024-02-12 01:08:33 +02:00
if ( i < dictsize )
wd [ i + + ] = k2 / 32 ;
if ( i < dictsize )
wd [ i + + ] = k2 % 32 ;
2019-02-05 02:44:07 +02:00
}
}
else
{ alphabet_used [ k2 ] = ' Y ' ;
2024-02-12 01:08:33 +02:00
if ( ( k2 / 26 ) ! = 0 & & i < dictsize )
2019-02-05 02:44:07 +02:00
wd [ i + + ] = 3 + ( k2 / 26 ) ; /* Change alphabet for symbols */
2024-02-12 01:08:33 +02:00
if ( i < dictsize )
wd [ i + + ] = 6 + ( k2 % 26 ) ; /* Write the Z character */
2019-02-05 02:44:07 +02:00
}
}
2024-02-12 01:08:33 +02:00
if ( i > dictsize )
compiler_error ( " dict word buffer overflow " ) ;
/* Fill up to the end of the dictionary block with PAD characters
( for safety , we right - pad to 9 chars even in V3 ) */
2019-02-05 02:44:07 +02:00
for ( ; i < 9 ; i + + ) wd [ i ] = 5 ;
2022-03-03 02:10:25 +02:00
/* The array of Z-chars is converted to two or three 2-byte blocks */
2024-02-12 01:08:33 +02:00
ensure_memory_list_available ( & prepared_sort_memlist , DICT_WORD_BYTES ) ;
2019-02-05 02:44:07 +02:00
tot = wd [ 2 ] + wd [ 1 ] * ( 1 < < 5 ) + wd [ 0 ] * ( 1 < < 10 ) ;
prepared_sort [ 1 ] = tot % 0x100 ;
prepared_sort [ 0 ] = ( tot / 0x100 ) % 0x100 ;
tot = wd [ 5 ] + wd [ 4 ] * ( 1 < < 5 ) + wd [ 3 ] * ( 1 < < 10 ) ;
prepared_sort [ 3 ] = tot % 0x100 ;
prepared_sort [ 2 ] = ( tot / 0x100 ) % 0x100 ;
2022-03-03 02:10:25 +02:00
if ( version_number = = 3 )
tot = 0 ;
else
tot = wd [ 8 ] + wd [ 7 ] * ( 1 < < 5 ) + wd [ 6 ] * ( 1 < < 10 ) ;
2019-02-05 02:44:07 +02:00
prepared_sort [ 5 ] = tot % 0x100 ;
prepared_sort [ 4 ] = ( tot / 0x100 ) % 0x100 ;
/* Set the "end bit" on the 2nd (in v3) or the 3rd (v4+) 2-byte block */
if ( version_number = = 3 ) prepared_sort [ 2 ] + = 0x80 ;
else prepared_sort [ 4 ] + = 0x80 ;
if ( optresult ) copy_sorts ( optresult , prepared_sort ) ;
}
/* Also used by verbs.c */
static void dictionary_prepare_g ( char * dword , uchar * optresult )
{
int i , j , k ;
int32 unicode ;
2024-02-12 01:08:33 +02:00
int negflag ;
2019-02-05 02:44:07 +02:00
2024-02-12 01:08:33 +02:00
prepared_dictflags_pos = 0 ;
prepared_dictflags_neg = 0 ;
2019-02-05 02:44:07 +02:00
2024-02-12 01:08:33 +02:00
for ( i = 0 , j = 0 ; ( dword [ j ] ! = 0 ) ; j + + ) {
2019-02-05 02:44:07 +02:00
if ( ( dword [ j ] = = ' / ' ) & & ( dword [ j + 1 ] = = ' / ' ) ) {
2024-02-12 01:08:33 +02:00
/* The rest of the word is dict flags. Run through them. */
negflag = FALSE ;
2019-02-05 02:44:07 +02:00
for ( j + = 2 ; dword [ j ] ! = 0 ; j + + ) {
switch ( dword [ j ] ) {
2024-02-12 01:08:33 +02:00
case ' ~ ' :
if ( ! dword [ j + 1 ] )
2024-02-27 00:51:15 +02:00
error_named ( " '//~' with no flag character (psn) in dict word " , dword ) ;
2024-02-12 01:08:33 +02:00
negflag = ! negflag ;
break ;
2019-02-05 02:44:07 +02:00
case ' p ' :
2024-02-12 01:08:33 +02:00
if ( ! negflag )
2024-02-27 00:51:15 +02:00
prepared_dictflags_pos | = PLURAL_DFLAG ;
else
prepared_dictflags_neg | = PLURAL_DFLAG ;
negflag = FALSE ;
break ;
case ' s ' :
if ( ! negflag )
prepared_dictflags_pos | = SING_DFLAG ;
2024-02-12 01:08:33 +02:00
else
2024-02-27 00:51:15 +02:00
prepared_dictflags_neg | = SING_DFLAG ;
2024-02-12 01:08:33 +02:00
negflag = FALSE ;
break ;
case ' n ' :
if ( ! negflag )
2024-02-27 00:51:15 +02:00
prepared_dictflags_pos | = NOUN_DFLAG ;
2024-02-12 01:08:33 +02:00
else
2024-02-27 00:51:15 +02:00
prepared_dictflags_neg | = NOUN_DFLAG ;
2024-02-12 01:08:33 +02:00
negflag = FALSE ;
break ;
2019-02-05 02:44:07 +02:00
default :
2024-02-27 00:51:15 +02:00
error_named ( " Expected flag character (psn~) after '//' in dict word " , dword ) ;
2019-02-05 02:44:07 +02:00
break ;
}
}
break ;
}
2024-02-12 01:08:33 +02:00
/* LONG_DICT_FLAG_BUG emulates the old behavior where we stop looping
at DICT_WORD_SIZE . */
if ( LONG_DICT_FLAG_BUG & & i > = DICT_WORD_SIZE )
break ;
2019-02-05 02:44:07 +02:00
k = ( ( unsigned char * ) dword ) [ j ] ;
if ( k = = ' \' ' )
warning_named ( " Obsolete usage: use the ^ character for the \
apostrophe in " , dword);
if ( k = = ' ^ ' )
k = ' \' ' ;
if ( k = = ' ~ ' ) /* as in iso_to_alphabet_grid */
k = ' \" ' ;
if ( k = = ' @ ' | | ( character_set_unicode & & ( k & 0x80 ) ) ) {
unicode = text_to_unicode ( dword + j ) ;
j + = textual_form_length - 1 ;
}
else {
unicode = iso_to_unicode_grid [ k ] ;
}
if ( DICT_CHAR_SIZE ! = 1 | | ( unicode > = 0 & & unicode < 256 ) ) {
k = unicode ;
}
else {
error ( " The dictionary cannot contain Unicode characters beyond Latin-1. \
Define DICT_CHAR_SIZE = 4 for a Unicode - compatible dictionary . " );
k = ' ? ' ;
}
if ( k > = ( unsigned ) ' A ' & & k < = ( unsigned ) ' Z ' )
k + = ( ' a ' - ' A ' ) ;
2024-02-12 01:08:33 +02:00
ensure_memory_list_available ( & prepared_sort_memlist , DICT_WORD_BYTES ) ;
2019-02-05 02:44:07 +02:00
if ( DICT_CHAR_SIZE = = 1 ) {
2024-02-12 01:08:33 +02:00
if ( i < DICT_WORD_SIZE )
prepared_sort [ i + + ] = k ;
2019-02-05 02:44:07 +02:00
}
else {
2024-02-12 01:08:33 +02:00
if ( i < DICT_WORD_SIZE ) {
prepared_sort [ 4 * i ] = ( k > > 24 ) & 0xFF ;
prepared_sort [ 4 * i + 1 ] = ( k > > 16 ) & 0xFF ;
prepared_sort [ 4 * i + 2 ] = ( k > > 8 ) & 0xFF ;
prepared_sort [ 4 * i + 3 ] = ( k ) & 0xFF ;
i + + ;
}
2019-02-05 02:44:07 +02:00
}
}
2024-02-12 01:08:33 +02:00
if ( i > DICT_WORD_SIZE )
compiler_error ( " dict word buffer overflow " ) ;
/* Right-pad with zeroes */
2019-02-05 02:44:07 +02:00
if ( DICT_CHAR_SIZE = = 1 ) {
for ( ; i < DICT_WORD_SIZE ; i + + )
prepared_sort [ i ] = 0 ;
}
else {
for ( ; i < DICT_WORD_SIZE ; i + + ) {
prepared_sort [ 4 * i ] = 0 ;
prepared_sort [ 4 * i + 1 ] = 0 ;
prepared_sort [ 4 * i + 2 ] = 0 ;
prepared_sort [ 4 * i + 3 ] = 0 ;
}
}
if ( optresult ) copy_sorts ( optresult , prepared_sort ) ;
}
extern void dictionary_prepare ( char * dword , uchar * optresult )
{
if ( ! glulx_mode )
dictionary_prepare_z ( dword , optresult ) ;
else
dictionary_prepare_g ( dword , optresult ) ;
}
/* ------------------------------------------------------------------------- */
/* The arrays below are all concerned with the problem of alphabetically */
/* sorting the dictionary during the compilation pass. */
/* Note that it is not enough simply to apply qsort to the dictionary at */
/* the end of the pass: we need to ensure that no duplicates are ever */
/* created. */
/* */
/* dict_sort_codes[n] the sort code of record n: i.e., of the nth */
/* word to be entered into the dictionary, where */
/* n counts upward from 0 */
/* (n is also called the "accession number") */
/* */
/* The tree structure encodes an ordering. The special value VACANT means */
/* "no node here": otherwise, node numbers are the same as accession */
/* numbers. At all times, "root" holds the node number of the top of the */
/* tree; each node has up to two branches, such that the subtree of the */
/* left branch is always alphabetically before what's at the node, and */
/* the subtree to the right is always after; and all branches are coloured */
/* either "black" or "red". These colours are used to detect points where */
/* the tree is growing asymmetrically (and therefore becoming inefficient */
/* to search). */
/* ------------------------------------------------------------------------- */
# define RED 'r'
# define BLACK 'b'
# define VACANT -1
static int root ;
typedef struct dict_tree_node_s
{ int branch [ 2 ] ; /* Branch 0 is "left", 1 is "right" */
char colour ; /* The colour of the branch to the parent */
} dict_tree_node ;
2022-03-03 02:10:25 +02:00
static dict_tree_node * dtree ; /* Allocated to dict_entries */
static memory_list dtree_memlist ;
static uchar * dict_sort_codes ; /* Allocated to dict_entries*DICT_WORD_BYTES */
static memory_list dict_sort_codes_memlist ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
int * final_dict_order ; /* Allocated at sort_dictionary() time */
2019-02-05 02:44:07 +02:00
static void dictionary_begin_pass ( void )
{
/* Leave room for the 7-byte header (added in "tables.c" much later) */
/* Glulx has a 4-byte header instead. */
if ( ! glulx_mode )
2022-03-03 02:10:25 +02:00
dictionary_top = 7 ;
2019-02-05 02:44:07 +02:00
else
2022-03-03 02:10:25 +02:00
dictionary_top = 4 ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
ensure_memory_list_available ( & dictionary_memlist , dictionary_top ) ;
2019-02-05 02:44:07 +02:00
root = VACANT ;
dict_entries = 0 ;
}
static int fdo_count ;
static void recursively_sort ( int node )
{ if ( dtree [ node ] . branch [ 0 ] ! = VACANT )
recursively_sort ( dtree [ node ] . branch [ 0 ] ) ;
final_dict_order [ node ] = fdo_count + + ;
if ( dtree [ node ] . branch [ 1 ] ! = VACANT )
recursively_sort ( dtree [ node ] . branch [ 1 ] ) ;
}
extern void sort_dictionary ( void )
2022-07-24 13:10:45 +03:00
{
2022-03-03 02:10:25 +02:00
final_dict_order = my_calloc ( sizeof ( int ) , dict_entries , " final dictionary ordering table " ) ;
2019-02-05 02:44:07 +02:00
if ( root ! = VACANT )
{ fdo_count = 0 ; recursively_sort ( root ) ;
}
}
/* ------------------------------------------------------------------------- */
/* If "dword" is in the dictionary, return its accession number plus 1; */
/* If not, return 0. */
/* ------------------------------------------------------------------------- */
static int dictionary_find ( char * dword )
{ int at = root , n ;
dictionary_prepare ( dword , NULL ) ;
while ( at ! = VACANT )
{ n = compare_sorts ( prepared_sort , dict_sort_codes + at * DICT_WORD_BYTES ) ;
if ( n = = 0 ) return at + 1 ;
if ( n > 0 ) at = dtree [ at ] . branch [ 1 ] ; else at = dtree [ at ] . branch [ 0 ] ;
}
return 0 ;
}
/* ------------------------------------------------------------------------- */
2024-02-12 01:08:33 +02:00
/* Add "dword" to the dictionary with (flag1,flag2,flag3) as its data */
/* fields; unless it already exists, in which case OR the data fields with */
/* those flags. */
2019-02-05 02:44:07 +02:00
/* */
/* These fields are one byte each in Z-code, two bytes each in Glulx. */
/* */
/* Returns: the accession number. */
/* ------------------------------------------------------------------------- */
2024-02-12 01:08:33 +02:00
extern int dictionary_add ( char * dword , int flag1 , int flag2 , int flag3 )
2019-02-05 02:44:07 +02:00
{ int n ; uchar * p ;
int ggfr = 0 , gfr = 0 , fr = 0 , r = 0 ;
int ggf = VACANT , gf = VACANT , f = VACANT , at = root ;
int a , b ;
int res = ( ( version_number = = 3 ) ? 4 : 6 ) ;
2024-02-12 01:08:33 +02:00
/* Fill in prepared_sort and prepared_dictflags. */
2019-02-05 02:44:07 +02:00
dictionary_prepare ( dword , NULL ) ;
2024-02-12 01:08:33 +02:00
/* Adjust flag1 according to prepared_dictflags. */
flag1 & = ( ~ prepared_dictflags_neg ) ;
flag1 | = prepared_dictflags_pos ;
2024-02-27 00:51:15 +02:00
if ( DICT_IMPLICIT_SINGULAR ) {
/* If we have //n but not //p, that implies //s. Unless //s is
explicitly forbidden . */
if ( ( flag1 & NOUN_DFLAG ) & & ! ( flag1 & PLURAL_DFLAG ) & & ! ( prepared_dictflags_neg & SING_DFLAG ) ) {
flag1 | = SING_DFLAG ;
}
}
2019-02-05 02:44:07 +02:00
if ( root = = VACANT )
{ root = 0 ; goto CreateEntry ;
}
while ( TRUE )
{
n = compare_sorts ( prepared_sort , dict_sort_codes + at * DICT_WORD_BYTES ) ;
if ( n = = 0 )
{
if ( ! glulx_mode ) {
2022-03-03 02:10:25 +02:00
p = dictionary + 7 + at * DICT_ENTRY_BYTE_LENGTH + res ;
2024-02-12 01:08:33 +02:00
p [ 0 ] | = flag1 ; p [ 1 ] | = flag2 ;
2022-03-03 02:10:25 +02:00
if ( ! ZCODE_LESS_DICT_DATA )
2024-02-12 01:08:33 +02:00
p [ 2 ] | = flag3 ;
2019-02-05 02:44:07 +02:00
}
else {
p = dictionary + 4 + at * DICT_ENTRY_BYTE_LENGTH + DICT_ENTRY_FLAG_POS ;
2024-02-12 01:08:33 +02:00
p [ 0 ] | = ( flag1 / 256 ) ; p [ 1 ] | = ( flag1 % 256 ) ;
p [ 2 ] | = ( flag2 / 256 ) ; p [ 3 ] | = ( flag2 % 256 ) ;
p [ 4 ] | = ( flag3 / 256 ) ; p [ 5 ] | = ( flag3 % 256 ) ;
2019-02-05 02:44:07 +02:00
}
return at ;
}
if ( n > 0 ) r = 1 ; else r = 0 ;
a = dtree [ at ] . branch [ 0 ] ; b = dtree [ at ] . branch [ 1 ] ;
if ( ( a ! = VACANT ) & & ( dtree [ a ] . colour = = RED ) & &
( b ! = VACANT ) & & ( dtree [ b ] . colour = = RED ) )
{ dtree [ a ] . colour = BLACK ;
dtree [ b ] . colour = BLACK ;
dtree [ at ] . colour = RED ;
/* A tree rotation may be needed to avoid two red links in a row:
e . g .
ggf ( or else gf is root ) ggf ( or f is root )
| |
gf f
/ \ ( red ) / \ ( both red )
f becomes gf at
/ \ ( red ) / \ / \
at
/ \
In effect we rehang the " gf " subtree from " f " .
See the Technical Manual for further details .
*/
if ( ( f ! = VACANT ) & & ( gf ! = VACANT ) & & ( dtree [ f ] . colour = = RED ) )
{
if ( fr = = gfr )
{ if ( ggf = = VACANT ) root = f ; else dtree [ ggf ] . branch [ ggfr ] = f ;
dtree [ gf ] . branch [ gfr ] = dtree [ f ] . branch [ 1 - fr ] ;
dtree [ f ] . branch [ 1 - fr ] = gf ;
dtree [ f ] . colour = BLACK ;
dtree [ gf ] . colour = RED ;
gf = ggf ; gfr = ggfr ;
}
else
{ if ( ggf = = VACANT ) root = at ; else dtree [ ggf ] . branch [ ggfr ] = at ;
dtree [ at ] . colour = BLACK ;
dtree [ gf ] . colour = RED ;
dtree [ f ] . branch [ fr ] = dtree [ at ] . branch [ gfr ] ;
dtree [ gf ] . branch [ gfr ] = dtree [ at ] . branch [ fr ] ;
dtree [ at ] . branch [ gfr ] = f ;
dtree [ at ] . branch [ fr ] = gf ;
r = 1 - r ; n = at ; if ( r = = fr ) at = f ; else at = gf ;
f = n ; gf = ggf ; fr = 1 - r ; gfr = ggfr ;
}
}
}
if ( dtree [ at ] . branch [ r ] = = VACANT )
{ dtree [ at ] . colour = RED ;
if ( ( f ! = VACANT ) & & ( gf ! = VACANT ) & & ( dtree [ f ] . colour = = RED ) )
{ if ( fr = = gfr )
{ if ( ggf = = VACANT ) root = f ; else dtree [ ggf ] . branch [ ggfr ] = f ;
dtree [ gf ] . branch [ gfr ] = dtree [ f ] . branch [ 1 - fr ] ;
dtree [ f ] . branch [ 1 - fr ] = gf ;
dtree [ f ] . colour = BLACK ;
dtree [ gf ] . colour = RED ;
}
else
{ if ( ggf = = VACANT ) root = at ; else dtree [ ggf ] . branch [ ggfr ] = at ;
dtree [ at ] . colour = BLACK ;
dtree [ gf ] . colour = RED ;
dtree [ f ] . branch [ fr ] = dtree [ at ] . branch [ gfr ] ;
dtree [ gf ] . branch [ gfr ] = dtree [ at ] . branch [ fr ] ;
dtree [ at ] . branch [ gfr ] = f ;
dtree [ at ] . branch [ fr ] = gf ;
r = 1 - r ; n = at ; if ( r = = fr ) at = f ; else at = gf ;
f = n ; gf = ggf ;
}
}
dtree [ at ] . branch [ r ] = dict_entries ;
goto CreateEntry ;
}
ggf = gf ; gf = f ; f = at ; at = dtree [ at ] . branch [ r ] ;
ggfr = gfr ; gfr = fr ; fr = r ;
}
CreateEntry :
2022-03-03 02:10:25 +02:00
ensure_memory_list_available ( & dtree_memlist , dict_entries + 1 ) ;
ensure_memory_list_available ( & dict_sort_codes_memlist , ( dict_entries + 1 ) * DICT_WORD_BYTES ) ;
2019-02-05 02:44:07 +02:00
dtree [ dict_entries ] . branch [ 0 ] = VACANT ;
dtree [ dict_entries ] . branch [ 1 ] = VACANT ;
dtree [ dict_entries ] . colour = BLACK ;
/* Address in Inform's own dictionary table to write the record to */
if ( ! glulx_mode ) {
2022-03-03 02:10:25 +02:00
ensure_memory_list_available ( & dictionary_memlist , dictionary_top + DICT_ENTRY_BYTE_LENGTH ) ;
p = dictionary + DICT_ENTRY_BYTE_LENGTH * dict_entries + 7 ;
2019-02-05 02:44:07 +02:00
/* So copy in the 4 (or 6) bytes of Z-coded text and the 3 data
bytes */
p [ 0 ] = prepared_sort [ 0 ] ; p [ 1 ] = prepared_sort [ 1 ] ;
p [ 2 ] = prepared_sort [ 2 ] ; p [ 3 ] = prepared_sort [ 3 ] ;
if ( version_number > 3 )
{ p [ 4 ] = prepared_sort [ 4 ] ; p [ 5 ] = prepared_sort [ 5 ] ; }
2024-02-12 01:08:33 +02:00
p [ res ] = flag1 ; p [ res + 1 ] = flag2 ;
if ( ! ZCODE_LESS_DICT_DATA ) p [ res + 2 ] = flag3 ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
dictionary_top + = DICT_ENTRY_BYTE_LENGTH ;
2019-02-05 02:44:07 +02:00
}
else {
int i ;
2022-03-03 02:10:25 +02:00
ensure_memory_list_available ( & dictionary_memlist , dictionary_top + DICT_ENTRY_BYTE_LENGTH ) ;
2019-02-05 02:44:07 +02:00
p = dictionary + 4 + DICT_ENTRY_BYTE_LENGTH * dict_entries ;
p [ 0 ] = 0x60 ; /* type byte -- dict word */
p + = DICT_CHAR_SIZE ;
for ( i = 0 ; i < DICT_WORD_BYTES ; i + + )
p [ i ] = prepared_sort [ i ] ;
p + = DICT_WORD_BYTES ;
2024-02-12 01:08:33 +02:00
p [ 0 ] = ( flag1 / 256 ) ; p [ 1 ] = ( flag1 % 256 ) ;
p [ 2 ] = ( flag2 / 256 ) ; p [ 3 ] = ( flag2 % 256 ) ;
p [ 4 ] = ( flag3 / 256 ) ; p [ 5 ] = ( flag3 % 256 ) ;
2019-02-05 02:44:07 +02:00
dictionary_top + = DICT_ENTRY_BYTE_LENGTH ;
}
copy_sorts ( dict_sort_codes + dict_entries * DICT_WORD_BYTES , prepared_sort ) ;
return dict_entries + + ;
}
/* ------------------------------------------------------------------------- */
/* Used in "tables.c" for "Extend ... only", to renumber a verb-word to a */
/* new verb syntax of its own. (Otherwise existing verb-words never */
/* change their verb-numbers.) */
/* ------------------------------------------------------------------------- */
extern void dictionary_set_verb_number ( char * dword , int to )
{ int i ; uchar * p ;
int res = ( ( version_number = = 3 ) ? 4 : 6 ) ;
i = dictionary_find ( dword ) ;
if ( i ! = 0 )
{
if ( ! glulx_mode ) {
2022-03-03 02:10:25 +02:00
p = dictionary + 7 + ( i - 1 ) * DICT_ENTRY_BYTE_LENGTH + res ;
2019-02-05 02:44:07 +02:00
p [ 1 ] = to ;
}
else {
p = dictionary + 4 + ( i - 1 ) * DICT_ENTRY_BYTE_LENGTH + DICT_ENTRY_FLAG_POS ;
p [ 2 ] = to / 256 ; p [ 3 ] = to % 256 ;
}
}
}
/* ------------------------------------------------------------------------- */
2022-07-24 13:10:45 +03:00
/* Tracing code for the dictionary: used by "trace" and text */
/* transcription. */
2019-02-05 02:44:07 +02:00
/* ------------------------------------------------------------------------- */
2022-03-03 02:10:25 +02:00
/* In the dictionary-showing code, if d_show_buf is NULL, the text is
printed directly . ( The " Trace dictionary " directive does this . )
If d_show_buf is not NULL , we add words to it ( reallocing if necessary )
until it ' s a page - width .
*/
static char * d_show_buf = NULL ;
static int d_show_size ; /* allocated size */
static int d_show_len ; /* current length */
2019-02-05 02:44:07 +02:00
static void show_char ( char c )
2022-03-03 02:10:25 +02:00
{
if ( d_show_buf = = NULL ) {
printf ( " %c " , c ) ;
}
else {
if ( d_show_len + 2 > = d_show_size ) {
int newsize = 2 * d_show_len + 16 ;
my_realloc ( & d_show_buf , d_show_size , newsize , " dictionary display buffer " ) ;
d_show_size = newsize ;
}
d_show_buf [ d_show_len + + ] = c ;
d_show_buf [ d_show_len ] = ' \0 ' ;
}
}
/* Display a Unicode character in user-readable form. This uses the same
character encoding as the source code . */
static void show_uchar ( uint32 c )
{
char buf [ 16 ] ;
int ix ;
if ( c < 0x80 ) {
/* ASCII always works */
show_char ( c ) ;
return ;
}
if ( character_set_unicode ) {
/* UTF-8 the character */
if ( c < 0x80 ) {
show_char ( c ) ;
}
else if ( c < 0x800 ) {
show_char ( ( 0xC0 | ( ( c & 0x7C0 ) > > 6 ) ) ) ;
show_char ( ( 0x80 | ( c & 0x03F ) ) ) ;
}
else if ( c < 0x10000 ) {
show_char ( ( 0xE0 | ( ( c & 0xF000 ) > > 12 ) ) ) ;
show_char ( ( 0x80 | ( ( c & 0x0FC0 ) > > 6 ) ) ) ;
show_char ( ( 0x80 | ( c & 0x003F ) ) ) ;
}
else if ( c < 0x200000 ) {
show_char ( ( 0xF0 | ( ( c & 0x1C0000 ) > > 18 ) ) ) ;
show_char ( ( 0x80 | ( ( c & 0x03F000 ) > > 12 ) ) ) ;
show_char ( ( 0x80 | ( ( c & 0x000FC0 ) > > 6 ) ) ) ;
show_char ( ( 0x80 | ( c & 0x00003F ) ) ) ;
}
else {
show_char ( ' ? ' ) ;
}
return ;
}
if ( character_set_setting = = 1 & & c < 0x100 ) {
/* Fits in Latin-1 */
show_char ( c ) ;
return ;
2019-02-05 02:44:07 +02:00
}
2022-03-03 02:10:25 +02:00
/* Supporting other character_set_setting is harder; not currently implemented. */
/* Use the escaped form */
sprintf ( buf , " @{%x} " , c ) ;
for ( ix = 0 ; buf [ ix ] ; ix + + )
show_char ( buf [ ix ] ) ;
2019-02-05 02:44:07 +02:00
}
extern void word_to_ascii ( uchar * p , char * results )
{ int i , shift , cc , zchar ; uchar encoded_word [ 9 ] ;
encoded_word [ 0 ] = ( ( ( int ) p [ 0 ] ) & 0x7c ) / 4 ;
encoded_word [ 1 ] = 8 * ( ( ( int ) p [ 0 ] ) & 0x3 ) + ( ( ( int ) p [ 1 ] ) & 0xe0 ) / 32 ;
encoded_word [ 2 ] = ( ( int ) p [ 1 ] ) & 0x1f ;
encoded_word [ 3 ] = ( ( ( int ) p [ 2 ] ) & 0x7c ) / 4 ;
encoded_word [ 4 ] = 8 * ( ( ( int ) p [ 2 ] ) & 0x3 ) + ( ( ( int ) p [ 3 ] ) & 0xe0 ) / 32 ;
encoded_word [ 5 ] = ( ( int ) p [ 3 ] ) & 0x1f ;
if ( version_number > 3 )
{ encoded_word [ 6 ] = ( ( ( int ) p [ 4 ] ) & 0x7c ) / 4 ;
encoded_word [ 7 ] = 8 * ( ( ( int ) p [ 4 ] ) & 0x3 ) + ( ( ( int ) p [ 5 ] ) & 0xe0 ) / 32 ;
encoded_word [ 8 ] = ( ( int ) p [ 5 ] ) & 0x1f ;
}
2022-03-03 02:10:25 +02:00
else
{
encoded_word [ 6 ] = encoded_word [ 7 ] = encoded_word [ 8 ] = 0 ;
}
2019-02-05 02:44:07 +02:00
shift = 0 ; cc = 0 ;
for ( i = 0 ; i < ( ( version_number = = 3 ) ? 6 : 9 ) ; i + + )
{ zchar = encoded_word [ i ] ;
if ( zchar = = 4 ) shift = 1 ;
else
if ( zchar = = 5 ) shift = 2 ;
else
{ if ( ( shift = = 2 ) & & ( zchar = = 6 ) )
{ zchar = 32 * encoded_word [ i + 1 ] + encoded_word [ i + 2 ] ;
i + = 2 ;
if ( ( zchar > = 32 ) & & ( zchar < = 126 ) )
results [ cc + + ] = zchar ;
else
{ zscii_to_text ( results + cc , zchar ) ;
cc = strlen ( results ) ;
}
}
else
{ zscii_to_text ( results + cc , ( alphabet [ shift ] ) [ zchar - 6 ] ) ;
cc = strlen ( results ) ;
}
shift = 0 ;
}
}
results [ cc ] = 0 ;
}
2022-03-03 02:10:25 +02:00
/* Print a dictionary word to stdout.
( This assumes that d_show_buf is null . )
*/
void print_dict_word ( int node )
{
uchar * p ;
int cprinted ;
if ( ! glulx_mode ) {
char textual_form [ 32 ] ;
p = ( uchar * ) dictionary + 7 + DICT_ENTRY_BYTE_LENGTH * node ;
word_to_ascii ( p , textual_form ) ;
for ( cprinted = 0 ; textual_form [ cprinted ] ! = 0 ; cprinted + + )
show_char ( textual_form [ cprinted ] ) ;
}
else {
p = ( uchar * ) dictionary + 4 + DICT_ENTRY_BYTE_LENGTH * node ;
for ( cprinted = 0 ; cprinted < DICT_WORD_SIZE ; cprinted + + )
{
uint32 ch ;
if ( DICT_CHAR_SIZE = = 1 )
ch = p [ 1 + cprinted ] ;
else
ch = ( p [ 4 * cprinted + 4 ] < < 24 ) + ( p [ 4 * cprinted + 5 ] < < 16 ) + ( p [ 4 * cprinted + 6 ] < < 8 ) + ( p [ 4 * cprinted + 7 ] ) ;
if ( ! ch )
break ;
show_uchar ( ch ) ;
}
}
}
2022-07-24 13:10:45 +03:00
static void recursively_show_z ( int node , int level )
2019-02-05 02:44:07 +02:00
{ int i , cprinted , flags ; uchar * p ;
char textual_form [ 32 ] ;
2022-03-03 02:10:25 +02:00
int res = ( version_number = = 3 ) ? 4 : 6 ; /* byte length of encoded text */
2019-02-05 02:44:07 +02:00
if ( dtree [ node ] . branch [ 0 ] ! = VACANT )
2022-07-24 13:10:45 +03:00
recursively_show_z ( dtree [ node ] . branch [ 0 ] , level ) ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
p = ( uchar * ) dictionary + 7 + DICT_ENTRY_BYTE_LENGTH * node ;
2019-02-05 02:44:07 +02:00
word_to_ascii ( p , textual_form ) ;
for ( cprinted = 0 ; textual_form [ cprinted ] ! = 0 ; cprinted + + )
show_char ( textual_form [ cprinted ] ) ;
for ( ; cprinted < 4 + ( ( version_number = = 3 ) ? 6 : 9 ) ; cprinted + + )
show_char ( ' ' ) ;
2022-07-24 13:10:45 +03:00
/* The level-1 info can only be printfed (d_show_buf must be null). */
if ( d_show_buf = = NULL & & level > = 1 )
{
if ( level > = 2 ) {
for ( i = 0 ; i < DICT_ENTRY_BYTE_LENGTH ; i + + ) printf ( " %02x " , p [ i ] ) ;
}
2019-02-05 02:44:07 +02:00
flags = ( int ) p [ res ] ;
2024-02-27 00:51:15 +02:00
if ( flags & NOUN_DFLAG )
2024-02-12 01:08:33 +02:00
printf ( " noun " ) ;
else
printf ( " " ) ;
2024-02-27 00:51:15 +02:00
if ( flags & PLURAL_DFLAG )
2024-02-12 01:08:33 +02:00
printf ( " p " ) ;
else
printf ( " " ) ;
2024-02-27 00:51:15 +02:00
if ( flags & SING_DFLAG )
printf ( " s " ) ;
else
printf ( " " ) ;
if ( flags & PREP_DFLAG )
2019-02-05 02:44:07 +02:00
{ if ( grammar_version_number = = 1 )
printf ( " preposition:%d " , ( int ) p [ res + 2 ] ) ;
else
printf ( " preposition " ) ;
}
2024-02-27 00:51:15 +02:00
if ( ( flags & METAVERB_DFLAG ) = = METAVERB_DFLAG )
printf ( " metaverb:%d " , ( int ) p [ res + 1 ] ) ;
else if ( ( flags & METAVERB_DFLAG ) = = VERB_DFLAG )
printf ( " verb:%d " , ( int ) p [ res + 1 ] ) ;
2019-02-05 02:44:07 +02:00
printf ( " \n " ) ;
}
2022-03-03 02:10:25 +02:00
/* Show five words per line in classic TRANSCRIPT_FORMAT; one per line in the new format. */
if ( d_show_buf & & ( d_show_len > = 64 | | TRANSCRIPT_FORMAT = = 1 ) )
{
write_to_transcript_file ( d_show_buf , STRCTX_DICT ) ;
d_show_len = 0 ;
2019-02-05 02:44:07 +02:00
}
if ( dtree [ node ] . branch [ 1 ] ! = VACANT )
2022-07-24 13:10:45 +03:00
recursively_show_z ( dtree [ node ] . branch [ 1 ] , level ) ;
2019-02-05 02:44:07 +02:00
}
2022-07-24 13:10:45 +03:00
static void recursively_show_g ( int node , int level )
2022-03-03 02:10:25 +02:00
{ int i , cprinted ;
uchar * p ;
if ( dtree [ node ] . branch [ 0 ] ! = VACANT )
2022-07-24 13:10:45 +03:00
recursively_show_g ( dtree [ node ] . branch [ 0 ] , level ) ;
2022-03-03 02:10:25 +02:00
p = ( uchar * ) dictionary + 4 + DICT_ENTRY_BYTE_LENGTH * node ;
for ( cprinted = 0 ; cprinted < DICT_WORD_SIZE ; cprinted + + )
{
uint32 ch ;
if ( DICT_CHAR_SIZE = = 1 )
ch = p [ 1 + cprinted ] ;
else
ch = ( p [ 4 * cprinted + 4 ] < < 24 ) + ( p [ 4 * cprinted + 5 ] < < 16 ) + ( p [ 4 * cprinted + 6 ] < < 8 ) + ( p [ 4 * cprinted + 7 ] ) ;
if ( ! ch )
break ;
show_uchar ( ch ) ;
}
for ( ; cprinted < DICT_WORD_SIZE + 4 ; cprinted + + )
show_char ( ' ' ) ;
2022-07-24 13:10:45 +03:00
/* The level-1 info can only be printfed (d_show_buf must be null). */
if ( d_show_buf = = NULL & & level > = 1 )
2022-03-03 02:10:25 +02:00
{ int flagpos = ( DICT_CHAR_SIZE = = 1 ) ? ( DICT_WORD_SIZE + 1 ) : ( DICT_WORD_BYTES + 4 ) ;
int flags = ( p [ flagpos + 0 ] < < 8 ) | ( p [ flagpos + 1 ] ) ;
int verbnum = ( p [ flagpos + 2 ] < < 8 ) | ( p [ flagpos + 3 ] ) ;
2022-07-24 13:10:45 +03:00
if ( level > = 2 ) {
for ( i = 0 ; i < DICT_ENTRY_BYTE_LENGTH ; i + + ) printf ( " %02x " , p [ i ] ) ;
}
2024-02-27 00:51:15 +02:00
if ( flags & NOUN_DFLAG )
2024-02-12 01:08:33 +02:00
printf ( " noun " ) ;
else
printf ( " " ) ;
2024-02-27 00:51:15 +02:00
if ( flags & PLURAL_DFLAG )
2024-02-12 01:08:33 +02:00
printf ( " p " ) ;
else
printf ( " " ) ;
2024-02-27 00:51:15 +02:00
if ( flags & SING_DFLAG )
printf ( " s " ) ;
else
printf ( " " ) ;
if ( flags & PREP_DFLAG )
printf ( " preposition " ) ;
if ( ( flags & METAVERB_DFLAG ) = = METAVERB_DFLAG )
printf ( " metaverb:%d " , verbnum ) ;
else if ( ( flags & METAVERB_DFLAG ) = = VERB_DFLAG )
printf ( " verb:%d " , verbnum ) ;
2022-03-03 02:10:25 +02:00
printf ( " \n " ) ;
}
/* Show five words per line in classic TRANSCRIPT_FORMAT; one per line in the new format. */
if ( d_show_buf & & ( d_show_len > = 64 | | TRANSCRIPT_FORMAT = = 1 ) )
{
write_to_transcript_file ( d_show_buf , STRCTX_DICT ) ;
d_show_len = 0 ;
}
if ( dtree [ node ] . branch [ 1 ] ! = VACANT )
2022-07-24 13:10:45 +03:00
recursively_show_g ( dtree [ node ] . branch [ 1 ] , level ) ;
2019-02-05 02:44:07 +02:00
}
static void show_alphabet ( int i )
{ int j , c ; char chartext [ 8 ] ;
for ( j = 0 ; j < 26 ; j + + )
{ c = alphabet [ i ] [ j ] ;
if ( alphabet_used [ 26 * i + j ] = = ' N ' ) printf ( " ( " ) ; else printf ( " " ) ;
zscii_to_text ( chartext , c ) ;
printf ( " %s " , chartext ) ;
if ( alphabet_used [ 26 * i + j ] = = ' N ' ) printf ( " ) " ) ; else printf ( " " ) ;
}
printf ( " \n " ) ;
}
2022-07-24 13:10:45 +03:00
extern void show_dictionary ( int level )
{
/* Level 0: show words only. Level 1: show words and flags.
Level 2 : also show bytes . */
printf ( " Dictionary contains %d entries: \n " , dict_entries ) ;
2019-02-05 02:44:07 +02:00
if ( dict_entries ! = 0 )
2022-03-03 02:10:25 +02:00
{ d_show_len = 0 ; d_show_buf = NULL ;
2019-02-05 02:44:07 +02:00
if ( ! glulx_mode )
2022-07-24 13:10:45 +03:00
recursively_show_z ( root , level ) ;
2019-02-05 02:44:07 +02:00
else
2022-07-24 13:10:45 +03:00
recursively_show_g ( root , level ) ;
2019-02-05 02:44:07 +02:00
}
2022-03-03 02:10:25 +02:00
if ( ! glulx_mode )
{
printf ( " \n Z-machine alphabet entries: \n " ) ;
show_alphabet ( 0 ) ;
show_alphabet ( 1 ) ;
show_alphabet ( 2 ) ;
}
2019-02-05 02:44:07 +02:00
}
extern void write_dictionary_to_transcript ( void )
2022-03-03 02:10:25 +02:00
{
d_show_size = 80 ; /* initial size */
d_show_buf = my_malloc ( d_show_size , " dictionary display buffer " ) ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
write_to_transcript_file ( " " , STRCTX_INFO ) ;
sprintf ( d_show_buf , " [Dictionary contains %d entries:] " , dict_entries ) ;
write_to_transcript_file ( d_show_buf , STRCTX_INFO ) ;
d_show_len = 0 ;
2019-02-05 02:44:07 +02:00
if ( dict_entries ! = 0 )
2022-03-03 02:10:25 +02:00
{
2019-02-05 02:44:07 +02:00
if ( ! glulx_mode )
2022-07-24 13:10:45 +03:00
recursively_show_z ( root , 0 ) ;
2019-02-05 02:44:07 +02:00
else
2022-07-24 13:10:45 +03:00
recursively_show_g ( root , 0 ) ;
2019-02-05 02:44:07 +02:00
}
2022-03-03 02:10:25 +02:00
if ( d_show_len ! = 0 ) write_to_transcript_file ( d_show_buf , STRCTX_DICT ) ;
my_free ( & d_show_buf , " dictionary display buffer " ) ;
d_show_len = 0 ; d_show_buf = NULL ;
2019-02-05 02:44:07 +02:00
}
/* ========================================================================= */
/* Data structure management routines */
/* ------------------------------------------------------------------------- */
extern void init_text_vars ( void )
{ int j ;
2022-03-03 02:10:25 +02:00
opttext = NULL ;
opttextlen = 0 ;
2019-02-05 02:44:07 +02:00
bestyet = NULL ;
bestyet2 = NULL ;
tlbtab = NULL ;
grandtable = NULL ;
grandflags = NULL ;
2022-03-03 02:10:25 +02:00
2024-02-12 01:08:33 +02:00
translated_text = NULL ;
temp_symbol = NULL ;
2022-03-03 02:10:25 +02:00
all_text = NULL ;
2019-02-05 02:44:07 +02:00
for ( j = 0 ; j < 256 ; j + + ) abbrevs_lookup [ j ] = - 1 ;
total_zchars_trans = 0 ;
2022-03-03 02:10:25 +02:00
dictionary = NULL ;
dictionary_top = 0 ;
2019-02-05 02:44:07 +02:00
dtree = NULL ;
final_dict_order = NULL ;
dict_sort_codes = NULL ;
2024-02-12 01:08:33 +02:00
prepared_sort = NULL ;
2019-02-05 02:44:07 +02:00
dict_entries = 0 ;
2022-03-03 02:10:25 +02:00
static_strings_area = NULL ;
abbreviations_optimal_parse_schedule = NULL ;
abbreviations_optimal_parse_scores = NULL ;
compressed_offsets = NULL ;
huff_entities = NULL ;
hufflist = NULL ;
unicode_usage_entries = NULL ;
2019-02-05 02:44:07 +02:00
}
extern void text_begin_pass ( void )
{ abbrevs_lookup_table_made = FALSE ;
no_abbreviations = 0 ;
2024-02-12 01:08:33 +02:00
abbreviations_totaltext = 0 ;
2019-02-05 02:44:07 +02:00
total_chars_trans = 0 ; total_bytes_trans = 0 ;
2022-03-03 02:10:25 +02:00
all_text_top = 0 ;
2019-02-05 02:44:07 +02:00
dictionary_begin_pass ( ) ;
2022-03-03 02:10:25 +02:00
low_strings_top = 0 ;
2019-02-05 02:44:07 +02:00
static_strings_extent = 0 ;
no_strings = 0 ;
no_dynamic_strings = 0 ;
no_unicode_chars = 0 ;
}
/* Note: for allocation and deallocation of all_the_text, see inform.c */
extern void text_allocate_arrays ( void )
2022-03-03 02:10:25 +02:00
{
int ix ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
initialise_memory_list ( & translated_text_memlist ,
sizeof ( uchar ) , 8000 , ( void * * ) & translated_text ,
" translated text holding area " ) ;
2024-02-12 01:08:33 +02:00
initialise_memory_list ( & temp_symbol_memlist ,
sizeof ( char ) , 32 , ( void * * ) & temp_symbol ,
" temporary symbol name " ) ;
2022-03-03 02:10:25 +02:00
initialise_memory_list ( & all_text_memlist ,
sizeof ( char ) , 0 , ( void * * ) & all_text ,
" transcription text for optimise " ) ;
initialise_memory_list ( & static_strings_area_memlist ,
sizeof ( uchar ) , 128 , ( void * * ) & static_strings_area ,
" static strings area " ) ;
2024-02-12 01:08:33 +02:00
initialise_memory_list ( & abbreviations_text_memlist ,
sizeof ( char ) , 64 , ( void * * ) & abbreviations_text ,
2022-03-03 02:10:25 +02:00
" abbreviation text " ) ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
initialise_memory_list ( & abbreviations_memlist ,
sizeof ( abbreviation ) , 64 , ( void * * ) & abbreviations ,
" abbreviations " ) ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
initialise_memory_list ( & abbreviations_optimal_parse_schedule_memlist ,
sizeof ( int ) , 0 , ( void * * ) & abbreviations_optimal_parse_schedule ,
" abbreviations optimal parse schedule " ) ;
initialise_memory_list ( & abbreviations_optimal_parse_scores_memlist ,
sizeof ( int ) , 0 , ( void * * ) & abbreviations_optimal_parse_scores ,
" abbreviations optimal parse scores " ) ;
initialise_memory_list ( & dtree_memlist ,
sizeof ( dict_tree_node ) , 1500 , ( void * * ) & dtree ,
" red-black tree for dictionary " ) ;
initialise_memory_list ( & dict_sort_codes_memlist ,
sizeof ( uchar ) , 1500 * DICT_WORD_BYTES , ( void * * ) & dict_sort_codes ,
" dictionary sort codes " ) ;
2024-02-12 01:08:33 +02:00
initialise_memory_list ( & prepared_sort_memlist ,
sizeof ( uchar ) , DICT_WORD_BYTES , ( void * * ) & prepared_sort ,
" prepared sort buffer " ) ;
2022-03-03 02:10:25 +02:00
final_dict_order = NULL ; /* will be allocated at sort_dictionary() time */
/* The exact size will be 7+7*num for z3, 7+9*num for z4+,
4 + DICT_ENTRY_BYTE_LENGTH * num for Glulx . But this is just an initial
allocation ; we don ' t have to be precise . */
initialise_memory_list ( & dictionary_memlist ,
sizeof ( uchar ) , 1000 * DICT_ENTRY_BYTE_LENGTH , ( void * * ) & dictionary ,
" dictionary " ) ;
initialise_memory_list ( & low_strings_memlist ,
sizeof ( uchar ) , 1024 , ( void * * ) & low_strings ,
" low (abbreviation) strings " ) ;
d_show_buf = NULL ;
d_show_size = 0 ;
d_show_len = 0 ;
2019-02-05 02:44:07 +02:00
huff_entities = NULL ;
hufflist = NULL ;
unicode_usage_entries = NULL ;
done_compression = FALSE ;
compression_table_size = 0 ;
compressed_offsets = NULL ;
2022-03-03 02:10:25 +02:00
initialise_memory_list ( & unicode_usage_entries_memlist ,
sizeof ( unicode_usage_t ) , 0 , ( void * * ) & unicode_usage_entries ,
" unicode entity entries " ) ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
/* hufflist and huff_entities will be allocated at compress_game_text() time. */
/* This hash table is only used in Glulx */
for ( ix = 0 ; ix < UNICODE_HASH_BUCKETS ; ix + + )
unicode_usage_hash [ ix ] = - 1 ;
initialise_memory_list ( & compressed_offsets_memlist ,
sizeof ( int32 ) , 0 , ( void * * ) & compressed_offsets ,
2019-02-05 02:44:07 +02:00
" static strings index table " ) ;
2022-03-03 02:10:25 +02:00
}
extern void extract_all_text ( )
{
/* optimise_abbreviations() is called after free_arrays(). Therefore,
we need to preserve the text transcript where it will not be
freed up . We do this by copying the pointer to opttext . */
opttext = all_text ;
opttextlen = all_text_top ;
/* Re-init all_text_memlist. This causes it to forget all about the
old pointer . Deallocating it in text_free_arrays ( ) will be a no - op . */
initialise_memory_list ( & all_text_memlist ,
sizeof ( char ) , 0 , ( void * * ) & all_text ,
" dummy transcription text " ) ;
2019-02-05 02:44:07 +02:00
}
extern void text_free_arrays ( void )
{
2022-03-03 02:10:25 +02:00
deallocate_memory_list ( & translated_text_memlist ) ;
2024-02-12 01:08:33 +02:00
deallocate_memory_list ( & temp_symbol_memlist ) ;
2022-03-03 02:10:25 +02:00
deallocate_memory_list ( & all_text_memlist ) ;
deallocate_memory_list ( & low_strings_memlist ) ;
2024-02-12 01:08:33 +02:00
deallocate_memory_list ( & abbreviations_text_memlist ) ;
2022-03-03 02:10:25 +02:00
deallocate_memory_list ( & abbreviations_memlist ) ;
deallocate_memory_list ( & abbreviations_optimal_parse_schedule_memlist ) ;
deallocate_memory_list ( & abbreviations_optimal_parse_scores_memlist ) ;
deallocate_memory_list ( & dtree_memlist ) ;
deallocate_memory_list ( & dict_sort_codes_memlist ) ;
2024-02-12 01:08:33 +02:00
deallocate_memory_list ( & prepared_sort_memlist ) ;
2019-02-05 02:44:07 +02:00
my_free ( & final_dict_order , " final dictionary ordering table " ) ;
2022-03-03 02:10:25 +02:00
deallocate_memory_list ( & dictionary_memlist ) ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
deallocate_memory_list ( & compressed_offsets_memlist ) ;
2019-02-05 02:44:07 +02:00
my_free ( & hufflist , " huffman node list " ) ;
my_free ( & huff_entities , " huffman entities " ) ;
2022-03-03 02:10:25 +02:00
deallocate_memory_list ( & unicode_usage_entries_memlist ) ;
2019-02-05 02:44:07 +02:00
2022-03-03 02:10:25 +02:00
deallocate_memory_list ( & static_strings_area_memlist ) ;
2019-02-05 02:44:07 +02:00
}
extern void ao_free_arrays ( void )
2022-03-03 02:10:25 +02:00
{
/* Called only after optimise_abbreviations() runs. */
2024-02-12 01:08:33 +02:00
int32 i ;
if ( bestyet ) {
for ( i = 0 ; i < MAX_BESTYET ; i + + ) {
my_free ( & bestyet [ i ] . text , " bestyet.text " ) ;
}
}
if ( bestyet2 ) {
for ( i = 0 ; i < MAX_ABBREVS ; i + + ) {
my_free ( & bestyet2 [ i ] . text , " bestyet2.text " ) ;
}
}
2022-03-03 02:10:25 +02:00
my_free ( & opttext , " stashed transcript for optimisation " ) ;
2019-02-05 02:44:07 +02:00
my_free ( & bestyet , " bestyet " ) ;
my_free ( & bestyet2 , " bestyet2 " ) ;
my_free ( & grandtable , " grandtable " ) ;
my_free ( & grandflags , " grandflags " ) ;
2022-03-03 02:10:25 +02:00
deallocate_memory_list ( & tlbtab_memlist ) ;
/* This was re-inited, so we should re-deallocate it. */
deallocate_memory_list ( & all_text_memlist ) ;
2019-02-05 02:44:07 +02:00
}
/* ========================================================================= */