inform7/inform6/Inform6/syntax.c

/* ------------------------------------------------------------------------- */
/*   "syntax" : Syntax analyser and compiler                                 */
/*                                                                           */
/*   Part of Inform 6.41                                                     */
/*   copyright (c) Graham Nelson 1993 - 2022                                 */
/*                                                                           */
/* ------------------------------------------------------------------------- */

#include "header.h"

static char *lexical_source;

int no_syntax_lines;                                  /*  Syntax line count  */

static void begin_syntax_line(int statement_mode)
{   no_syntax_lines++;
    next_token_begins_syntax_line = TRUE;

    clear_expression_space();
    if (statement_mode)
    {   statements.enabled = TRUE;
        conditions.enabled = TRUE;
        local_variables.enabled = TRUE;
        system_functions.enabled = TRUE;

        misc_keywords.enabled = FALSE;
        directive_keywords.enabled = FALSE;
        directives.enabled = FALSE;
        segment_markers.enabled = FALSE;
        opcode_names.enabled = FALSE;
    }
    else
    {   directives.enabled = TRUE;
        segment_markers.enabled = TRUE;

        statements.enabled = FALSE;
        misc_keywords.enabled = FALSE;
        directive_keywords.enabled = FALSE;
        local_variables.enabled = FALSE;
        system_functions.enabled = FALSE;
        conditions.enabled = FALSE;
        opcode_names.enabled = FALSE;
    }

    sequence_point_follows = TRUE;

    if (debugfile_switch)
    {   get_next_token();
        statement_debug_location = get_token_location();
        put_token_back();
    }
}

extern void panic_mode_error_recovery(void)
{
    /* Consume tokens until the next semicolon (or end of file).
       This is typically called after a syntax error, in hopes of
       getting parsing back on track. */

    while ((token_type != EOF_TT)
           && ((token_type != SEP_TT)||(token_value != SEMICOLON_SEP)))

        get_next_token();
}

extern void get_next_token_with_directives(void)
{
    /* A higher-level version of get_next_token(), which detects and
       obeys directives such as #ifdef/#ifnot/#endif. (The # sign is
       required in this case.)

       This is called while parsing a long construct, such as Class or
       Object, where we want to support internal #ifdefs. (Although
       function-parsing predates this and doesn't make use of it.) */

    int directives_save, segment_markers_save, statements_save;

    while (TRUE)
    {
        get_next_token();

        /* If the first token is not a '#', return it directly. */
        if ((token_type != SEP_TT) || (token_value != HASH_SEP))
            return;

        /* Save the lexer flags, and set up for directive parsing. */
        directives_save = directives.enabled;
        segment_markers_save = segment_markers.enabled;
        statements_save = statements.enabled;

        directives.enabled = TRUE;
        segment_markers.enabled = FALSE;
        statements.enabled = FALSE;
        conditions.enabled = FALSE;
        local_variables.enabled = FALSE;
        misc_keywords.enabled = FALSE;
        system_functions.enabled = FALSE;

        get_next_token();

        if ((token_type == SEP_TT) && (token_value == OPEN_SQUARE_SEP))
        {   error("It is illegal to nest a routine inside an object using '#['");
            return;
        }

        if (token_type == DIRECTIVE_TT)
            parse_given_directive(TRUE);
        else
        {   ebf_error("directive", token_text);
            return;
        }

        /* Restore all the lexer flags. (We are squashing several of them
           into a single save variable, which I think is safe because that's
           what CKnight did.)
        */
        directive_keywords.enabled = FALSE;
        directives.enabled = directives_save;
        segment_markers.enabled = segment_markers_save;
        statements.enabled =
            conditions.enabled =
            local_variables.enabled =
            misc_keywords.enabled =
            system_functions.enabled = statements_save;
    }
}

extern void parse_program(char *source)
{
    lexical_source = source;
    while (parse_directive(FALSE)) ;
}

extern int parse_directive(int internal_flag)
{
    /*  Internal_flag is FALSE if the directive is encountered normally
        (at the top level of the program); TRUE if encountered with
        a # prefix inside a routine or object definition.

        (Only directives like #ifdef are permitted inside a definition.)

        Returns: TRUE if program continues, FALSE if end of file reached.    */

    int routine_symbol, rep_symbol;
    int is_renamed;

    begin_syntax_line(FALSE);
    if (!internal_flag) {
        /* An internal directive can occur in the middle of an expression or
           object definition. So we only release for top-level directives.   */
        release_token_texts();
    }
    get_next_token();

    if (token_type == EOF_TT) return(FALSE);

    if ((token_type == SEP_TT) && (token_value == HASH_SEP))
        get_next_token();

    if ((token_type == SEP_TT) && (token_value == OPEN_SQUARE_SEP))
    {   if (internal_flag)
        {   error("It is illegal to nest routines using '#['");
            return(TRUE);
        }

        directives.enabled = FALSE;
        directive_keywords.enabled = FALSE;
        segment_markers.enabled = FALSE;

        /* The upcoming symbol is a definition; don't count it as a
           top-level reference *to* the function. */
        df_dont_note_global_symbols = TRUE;
        get_next_token();
        df_dont_note_global_symbols = FALSE;
        if (token_type != SYMBOL_TT)
        {   ebf_error("routine name", token_text);
            return(FALSE);
        }
        if ((!(symbols[token_value].flags & UNKNOWN_SFLAG))
            && (!(symbols[token_value].flags & REPLACE_SFLAG)))
        {   ebf_symbol_error("routine name", token_text, typename(symbols[token_value].type), symbols[token_value].line);
            return(FALSE);
        }

        routine_symbol = token_value;

        rep_symbol = routine_symbol;
        is_renamed = find_symbol_replacement(&rep_symbol);

        if ((symbols[routine_symbol].flags & REPLACE_SFLAG)
            && !is_renamed && (is_systemfile()))
        {   /* The function is definitely being replaced (system_file
               always loses priority in a replacement) but is not
               being renamed to something else. Skip its definition
               entirely. */
            dont_enter_into_symbol_table = TRUE;
            do
            {   get_next_token();
            } while (!((token_type == EOF_TT)
                     || ((token_type==SEP_TT)
                         && (token_value==CLOSE_SQUARE_SEP))));
            dont_enter_into_symbol_table = FALSE;
            if (token_type == EOF_TT) return FALSE;
        }
        else
        {   /* Parse the function definition and assign its symbol. */
            assign_symbol(routine_symbol,
                parse_routine(lexical_source, FALSE,
                    symbols[routine_symbol].name, FALSE, routine_symbol),
                ROUTINE_T);
            symbols[routine_symbol].line = routine_starts_line;
        }

        if (is_renamed) {
            /* This function was subject to a "Replace X Y" directive.
               The first time we see a definition for symbol X, we
               copy it to Y -- that's the "original" form of the
               function. */
            if (symbols[rep_symbol].value == 0) {
                assign_symbol(rep_symbol, symbols[routine_symbol].value, ROUTINE_T);
            }
        }

        get_next_token();
        if ((token_type != SEP_TT) || (token_value != SEMICOLON_SEP))
        {   ebf_error("';' after ']'", token_text);
            put_token_back();
        }
        return TRUE;
    }

    if ((token_type == SYMBOL_TT) && (symbols[token_value].type == CLASS_T))
    {   if (internal_flag)
        {   error("It is illegal to nest an object in a routine using '#classname'");
            return(TRUE);
        }
        symbols[token_value].flags |= USED_SFLAG;
        make_object(FALSE, NULL, -1, -1, symbols[token_value].value);
        return TRUE;
    }

    if (token_type != DIRECTIVE_TT)
    {   /* If we're internal, we expect only a directive here. If
           we're top-level, the possibilities are broader. */
        if (internal_flag)
            ebf_error("directive", token_text);
        else
            ebf_error("directive, '[' or class name", token_text);
        panic_mode_error_recovery();
        return TRUE;
    }

    return !(parse_given_directive(internal_flag));
}

/* Check what's coming up after a switch case value. */
static int switch_sign(void)
{
    if ((token_type == SEP_TT)&&(token_value == COLON_SEP))   return 1;
    if ((token_type == SEP_TT)&&(token_value == COMMA_SEP))   return 2;
    if ((token_type==MISC_KEYWORD_TT)&&(token_value==TO_MK))  return 3;
    return 0;
}

/* Info for the current switch statement. Both arrays indexed by spec_sp */
#define MAX_SPEC_STACK (32)
static assembly_operand spec_stack[MAX_SPEC_STACK];
static int spec_type[MAX_SPEC_STACK];

static void compile_alternatives_z(assembly_operand switch_value, int n,
    int stack_level, int label, int flag)
{   switch(n)
    {   case 1:
            assemblez_2_branch(je_zc, switch_value,
                spec_stack[stack_level],
                label, flag); return;
        case 2:
            assemblez_3_branch(je_zc, switch_value,
                spec_stack[stack_level], spec_stack[stack_level+1],
                label, flag); return;
        case 3:
            assemblez_4_branch(je_zc, switch_value,
                spec_stack[stack_level], spec_stack[stack_level+1],
                spec_stack[stack_level+2],
                label, flag); return;
    }
}

static void compile_alternatives_g(assembly_operand switch_value, int n,
    int stack_level, int label, int flag)
{
    int the_zc = (flag) ? jeq_gc : jne_gc;

    if (n == 1) {
      assembleg_2_branch(the_zc, switch_value,
        spec_stack[stack_level],
        label);
    }
    else {
      error("*** Cannot generate multi-equality tests in Glulx ***");
    }
}

static void compile_alternatives(assembly_operand switch_value, int n,
    int stack_level, int label, int flag)
{
  if (!glulx_mode)
    compile_alternatives_z(switch_value, n, stack_level, label, flag);
  else
    compile_alternatives_g(switch_value, n, stack_level, label, flag);
}

static void parse_switch_spec(assembly_operand switch_value, int label,
    int action_switch)
{
    int i, j, label_after = -1, spec_sp = 0;
    int max_equality_args = ((!glulx_mode) ? 3 : 1);

    sequence_point_follows = FALSE;

    do
    {   if (spec_sp >= MAX_SPEC_STACK)
        {   error("At most 32 values can be given in a single 'switch' case");
            panic_mode_error_recovery();
            return;
        }

        if (action_switch)
        {   get_next_token();
            if (token_type == SQ_TT || token_type == DQ_TT) {
                ebf_error("action (or fake action) name", token_text);
                continue;
            }
            spec_stack[spec_sp] = action_of_name(token_text);

            if (spec_stack[spec_sp].value == -1)
            {   spec_stack[spec_sp].value = 0;
                ebf_error("action (or fake action) name", token_text);
            }
        }
        else
            spec_stack[spec_sp] =
      code_generate(parse_expression(CONSTANT_CONTEXT), CONSTANT_CONTEXT, -1);

        misc_keywords.enabled = TRUE;
        get_next_token();
        misc_keywords.enabled = FALSE;

        spec_type[spec_sp++] = switch_sign();
        switch(spec_type[spec_sp-1])
        {   case 0:
                if (action_switch)
                    ebf_error("',' or ':'", token_text);
                else ebf_error("',', ':' or 'to'", token_text);
                panic_mode_error_recovery();
                return;
            case 1: goto GenSpecCode;
            case 3: if (label_after == -1) label_after = next_label++;
        }
     } while(TRUE);

     GenSpecCode:

     if ((spec_sp > max_equality_args) && (label_after == -1))
         label_after = next_label++;

     if (label_after == -1)
     {   compile_alternatives(switch_value, spec_sp, 0, label, FALSE); return;
     }

     for (i=0; i<spec_sp;)
     {
         j=i; while ((j<spec_sp) && (spec_type[j] != 3)) j++;

         if (j > i)
         {   if (j-i > max_equality_args) j=i+max_equality_args;

             if (j == spec_sp)
                 compile_alternatives(switch_value, j-i, i, label, FALSE);
             else
                 compile_alternatives(switch_value, j-i, i, label_after, TRUE);

             i=j;
         }
         else
         {
           if (!glulx_mode) {
             if (i == spec_sp - 2)
             {   assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
                     label, TRUE);
                 assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
                     label, TRUE);
             }
             else
             {   assemblez_2_branch(jl_zc, switch_value, spec_stack[i],
                     next_label, TRUE);
                 assemblez_2_branch(jg_zc, switch_value, spec_stack[i+1],
                     label_after, FALSE);
                 assemble_label_no(next_label++);
             }
           }
           else {
             if (i == spec_sp - 2)
             {   assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
                     label);
                 assembleg_2_branch(jgt_gc, switch_value, spec_stack[i+1],
                     label);
             }
             else
             {   assembleg_2_branch(jlt_gc, switch_value, spec_stack[i],
                     next_label);
                 assembleg_2_branch(jle_gc, switch_value, spec_stack[i+1],
                     label_after);
                 assemble_label_no(next_label++);
             }
           }
           i = i+2;
         }
     }

     assemble_label_no(label_after);
}

extern int32 parse_routine(char *source, int embedded_flag, char *name,
    int veneer_flag, int r_symbol)
{   int32 packed_address; int i; int debug_flag = FALSE;
    int switch_clause_made = FALSE, default_clause_made = FALSE,
        switch_label = 0;
    debug_location_beginning beginning_debug_location =
        get_token_location_beginning();

    /*  (switch_label needs no initialisation here, but it prevents some
        compilers from issuing warnings)   */

    if ((source != lexical_source) || (veneer_flag))
    {   lexical_source = source;
        restart_lexer(lexical_source, name);
    }

    no_locals = 0;

    for (i=0;i<MAX_LOCAL_VARIABLES-1;i++)
        local_variable_names[i].text[0] = 0;

    do
    {   statements.enabled = TRUE;
        dont_enter_into_symbol_table = TRUE;
        get_next_token();
        dont_enter_into_symbol_table = FALSE;
        if ((token_type == SEP_TT) && (token_value == TIMES_SEP)
            && (no_locals == 0) && (!debug_flag))
        {   debug_flag = TRUE; continue;
        }

        if (token_type != DQ_TT)
        {   if ((token_type == SEP_TT)
                && (token_value == SEMICOLON_SEP)) break;
            ebf_error("local variable name or ';'", token_text);
            panic_mode_error_recovery();
            break;
        }

        if (strlen(token_text) > MAX_IDENTIFIER_LENGTH)
        {   error_named("Local variable identifier too long:", token_text);
            panic_mode_error_recovery();
            break;
        }

        if (no_locals == MAX_LOCAL_VARIABLES-1)
        {   error_numbered("Too many local variables for a routine; max is",
                MAX_LOCAL_VARIABLES-1);
            panic_mode_error_recovery();
            break;
        }

        for (i=0;i<no_locals;i++) {
            if (strcmpcis(token_text, local_variable_names[i].text)==0)
                error_named("Local variable defined twice:", token_text);
        }
        strcpy(local_variable_names[no_locals++].text, token_text);
    } while(TRUE);

    /* Set up the local variable hash and the local_variables.keywords
       table. */
    construct_local_variable_tables();

    if ((trace_fns_setting==3)
        || ((trace_fns_setting==2) && (veneer_mode==FALSE))
        || ((trace_fns_setting==1) && (is_systemfile()==FALSE)))
        debug_flag = TRUE;
    if ((embedded_flag == FALSE) && (veneer_mode == FALSE) && debug_flag)
        symbols[r_symbol].flags |= STAR_SFLAG;

    packed_address = assemble_routine_header(no_locals, debug_flag,
        name, embedded_flag, r_symbol);

    do
    {   begin_syntax_line(TRUE);
        release_token_texts();
        get_next_token();

        if (token_type == EOF_TT)
        {   ebf_error("']'", token_text);
            assemble_routine_end
                (embedded_flag,
                 get_token_location_end(beginning_debug_location));
            put_token_back();
            break;
        }

        if ((token_type == SEP_TT)
            && (token_value == CLOSE_SQUARE_SEP))
        {   if (switch_clause_made && (!default_clause_made))
                assemble_label_no(switch_label);
            directives.enabled = TRUE;
            sequence_point_follows = TRUE;
            get_next_token();
            assemble_routine_end
                (embedded_flag,
                 get_token_location_end(beginning_debug_location));
            put_token_back();
            break;
        }

        if ((token_type == STATEMENT_TT) && (token_value == SDEFAULT_CODE))
        {   if (default_clause_made)
                error("Multiple 'default' clauses defined in same 'switch'");
            default_clause_made = TRUE;

            if (switch_clause_made)
            {   if (!execution_never_reaches_here)
                {   sequence_point_follows = FALSE;
                    if (!glulx_mode)
                        assemblez_0((embedded_flag)?rfalse_zc:rtrue_zc);
                    else
                        assembleg_1(return_gc,
                            ((embedded_flag)?zero_operand:one_operand));
                }
                assemble_label_no(switch_label);
            }
            switch_clause_made = TRUE;

            get_next_token();
            if ((token_type == SEP_TT) &&
                (token_value == COLON_SEP)) continue;
            ebf_error("':' after 'default'", token_text);
            panic_mode_error_recovery();
            continue;
        }

        /*  Only check for the form of a case switch if the initial token
            isn't double-quoted text, as that would mean it was a print_ret
            statement: this is a mild ambiguity in the grammar.
            Action statements also cannot be cases. */

        if ((token_type != DQ_TT) && (token_type != SEP_TT))
        {   get_next_token();
            if (switch_sign() > 0)
            {   assembly_operand AO;
                if (default_clause_made)
                    error("'default' must be the last 'switch' case");

                if (switch_clause_made)
                {   if (!execution_never_reaches_here)
                    {   sequence_point_follows = FALSE;
                        if (!glulx_mode)
                            assemblez_0((embedded_flag)?rfalse_zc:rtrue_zc);
                        else
                            assembleg_1(return_gc,
                                ((embedded_flag)?zero_operand:one_operand));
                    }
                    assemble_label_no(switch_label);
                }

                switch_label = next_label++;
                switch_clause_made = TRUE;
                put_token_back(); put_token_back();

                if (!glulx_mode) {
                    INITAOTV(&AO, VARIABLE_OT, 249);
                }
                else {
                    INITAOTV(&AO, GLOBALVAR_OT, MAX_LOCAL_VARIABLES+6); /* sw__var */
                }
                parse_switch_spec(AO, switch_label, TRUE);

                continue;
            }
            else
            {   put_token_back(); put_token_back(); get_next_token();
                sequence_point_follows = TRUE;
            }
        }

        parse_statement(-1, -1);

    } while (TRUE);

    return packed_address;
}

/* Parse one block of code (a statement or brace-delimited stanza).
   This is used by the IF, DO, FOR, OBJECTLOOP, SWITCH, and WHILE
   statements.
   (Note that this is *not* called by the top-level parse_routine()
   handler.)
   The break_label and continue_label arguments are the labels in
   the calling block to jump to on "break" or "continue". -1 means
   we can't "break"/"continue" here (because we're not in a loop/switch).
   If switch_rule is true, we're in a switch block; case labels are
   accepted.
*/
extern void parse_code_block(int break_label, int continue_label,
    int switch_rule)
{   int switch_clause_made = FALSE, default_clause_made = FALSE, switch_label = 0;
    int unary_minus_flag, saved_entire_flag;

    saved_entire_flag = (execution_never_reaches_here & EXECSTATE_ENTIRE);
    if (execution_never_reaches_here)
        execution_never_reaches_here |= EXECSTATE_ENTIRE;

    begin_syntax_line(TRUE);
    release_token_texts();
    get_next_token();

    if (token_type == SEP_TT && token_value == OPEN_BRACE_SEP)
    {
        /* Parse a braced stanza of statements. */
        do
        {   begin_syntax_line(TRUE);
            release_token_texts();
            get_next_token();

            if ((token_type == SEP_TT) && (token_value == HASH_SEP))
            {   parse_directive(TRUE);
                continue;
            }
            if (token_type == SEP_TT && token_value == CLOSE_BRACE_SEP)
            {   if (switch_clause_made && (!default_clause_made))
                    assemble_label_no(switch_label);
                break;
            }
            if (token_type == EOF_TT)
            {   ebf_error("'}'", token_text);
                break;
            }

            if (switch_rule != 0)
            {
                /*  Within a 'switch' block  */

                if ((token_type==STATEMENT_TT)&&(token_value==SDEFAULT_CODE))
                {   if (default_clause_made)
                error("Multiple 'default' clauses defined in same 'switch'");
                    default_clause_made = TRUE;

                    if (switch_clause_made)
                    {   if (!execution_never_reaches_here)
                        {   sequence_point_follows = FALSE;
                            assemble_jump(break_label);
                        }
                        assemble_label_no(switch_label);
                    }
                    switch_clause_made = TRUE;

                    get_next_token();
                    if ((token_type == SEP_TT) &&
                        (token_value == COLON_SEP)) continue;
                    ebf_error("':' after 'default'", token_text);
                    panic_mode_error_recovery();
                    continue;
                }

                /*  Decide: is this an ordinary statement, or the start
                    of a new case?  */

                if (token_type == DQ_TT) goto NotASwitchCase;

                unary_minus_flag
                    = ((token_type == SEP_TT)&&(token_value == MINUS_SEP));
                if (unary_minus_flag) get_next_token();

                /*  Now read the token _after_ any possible constant:
                    if that's a 'to', ',' or ':' then we have a case  */

                misc_keywords.enabled = TRUE;
                get_next_token();
                misc_keywords.enabled = FALSE;

                if (switch_sign() > 0)
                {   assembly_operand AO;

                    if (default_clause_made)
                        error("'default' must be the last 'switch' case");

                    if (switch_clause_made)
                    {   if (!execution_never_reaches_here)
                        {   sequence_point_follows = FALSE;
                            assemble_jump(break_label);
                        }
                        assemble_label_no(switch_label);
                    }

                    switch_label = next_label++;
                    switch_clause_made = TRUE;
                    put_token_back(); put_token_back();
                    if (unary_minus_flag) put_token_back();

                    AO = temp_var1;
                    parse_switch_spec(AO, switch_label, FALSE);
                    continue;
                }
                else
                {   put_token_back(); put_token_back();
                    if (unary_minus_flag) put_token_back();
                    get_next_token();
                }
            }

            if ((switch_rule != 0) && (!switch_clause_made))
                ebf_error("switch value", token_text);

            NotASwitchCase:
            sequence_point_follows = TRUE;
            parse_statement(break_label, continue_label);
        }
        while(TRUE);
    }
    else {
        if (switch_rule != 0)
            ebf_error("braced code block after 'switch'", token_text);

        /* Parse a single statement. */
        parse_statement(break_label, continue_label);
    }

    if (saved_entire_flag)
        execution_never_reaches_here |= EXECSTATE_ENTIRE;
    else
        execution_never_reaches_here &= ~EXECSTATE_ENTIRE;
}

/* ========================================================================= */
/*   Data structure management routines                                      */
/* ------------------------------------------------------------------------- */

extern void init_syntax_vars(void)
{
}

extern void syntax_begin_pass(void)
{   no_syntax_lines = 0;
}

extern void syntax_allocate_arrays(void)
{
}

extern void syntax_free_arrays(void)
{
}

/* ========================================================================= */