To generate final code from intermediate code.


§1. Pipeline stage. This whole module exists to provide a single pipeline stage, making the final generation of code from a tree of fully-linked and generally complete Inter.

It comes in two forms (the optional one writes nothing if no filename is supplied to it; the compulsory one throws an error in that case), but note that both call the same function.

void CodeGen::create_pipeline_stage(void) {
    ParsingPipelines::new_stage(I"generate", CodeGen::run_pipeline_stage,
        TEXT_OUT_STAGE_ARG, FALSE);
    ParsingPipelines::new_stage(I"optionally-generate", CodeGen::run_pipeline_stage,
        OPTIONAL_TEXT_OUT_STAGE_ARG, FALSE);
}

int CodeGen::run_pipeline_stage(pipeline_step *step) {
    if (step->generator_argument) {
        code_generation *gen = CodeGen::new_generation(step, step->ephemera.parsed_filename,
            step->ephemera.to_stream, step->ephemera.tree, step->ephemera.package_argument,
            step->generator_argument, step->ephemera.for_VM, FALSE);
        Generators::go(gen);
    }
    return TRUE;
}

§2. A "generation" is a single act of translating inter code into final code. It will be carried out by the "generator", using the data held in the following object.

typedef struct code_generation {
    struct code_generator *generator;
    struct pipeline_step *from_step;
    struct target_vm *for_VM;
    void *generator_private_data;  depending on the target generated to

    struct filename *to_file;       filename of output, and/or...
    struct text_stream *to_stream;  stream for textual output

    struct inter_tree *from;
    struct inter_package *just_this_package;

    struct segmentation_data segmentation;
    int void_level;
    int literal_text_mode;
    struct linked_list *global_variables;
    struct linked_list *text_literals;
    struct linked_list *assimilated_properties;
    struct linked_list *unassimilated_properties;
    struct linked_list *instances;
    struct linked_list *kinds;
    struct linked_list *kinds_in_declaration_order;
    struct linked_list *instances_in_declaration_order;

    int true_action_count;
    int fake_action_count;
    struct linked_list *actions;  of text_stream
    int dword_count;
    struct linked_list *words;  of vanilla_dword
    struct dictionary *dword_dictionary;  of vanilla_dword
    int verb_count;
    struct linked_list *verbs;  of vanilla_dword
    struct linked_list *verb_grammar;  of text_stream

    CLASS_DEFINITION
} code_generation;

code_generation *CodeGen::new_generation(pipeline_step *step, filename *F,
    text_stream *T, inter_tree *I, inter_package *just, code_generator *generator,
    target_vm *VM, int temp) {
    code_generation *gen = CREATE(code_generation);
    gen->from_step = step;
    gen->to_file = F;
    gen->to_stream = T;
    if ((VM == NULL) && (generator == NULL)) internal_error("no way to determine format");
    if (VM == NULL) VM = TargetVMs::find(generator->generator_name);
    gen->for_VM = VM;
    gen->from = I;
    gen->generator = generator;
    if (just) gen->just_this_package = just;
    else gen->just_this_package = LargeScale::main_package(I);
    gen->segmentation = CodeGen::new_segmentation_data();
    gen->void_level = -1;
    gen->literal_text_mode = REGULAR_LTM;
    gen->global_variables = NEW_LINKED_LIST(inter_symbol);
    gen->text_literals = NEW_LINKED_LIST(text_literal_holder);
    gen->assimilated_properties = NEW_LINKED_LIST(inter_symbol);
    gen->unassimilated_properties = NEW_LINKED_LIST(inter_symbol);
    gen->instances = NEW_LINKED_LIST(inter_symbol);
    gen->kinds = NEW_LINKED_LIST(inter_symbol);
    gen->kinds_in_declaration_order = NEW_LINKED_LIST(inter_symbol);
    gen->instances_in_declaration_order = NEW_LINKED_LIST(inter_symbol);
    gen->true_action_count = 0;
    gen->fake_action_count = 0;
    gen->actions = NEW_LINKED_LIST(text_stream);
    gen->dword_count = 0;
    gen->words = NEW_LINKED_LIST(vanilla_dword);
    gen->dword_dictionary = Dictionaries::new(1024, FALSE);
    gen->verb_count = 0;
    gen->verbs = NEW_LINKED_LIST(vanilla_dword);
    gen->verb_grammar = NEW_LINKED_LIST(text_stream);
    if (temp == FALSE) Traverse for global bric-a-brac2.1;
    return gen;
}

§2.1. Traverse for global bric-a-brac2.1 =

    InterTree::traverse(gen->from, CodeGen::gather_up, gen, NULL, 0);
    CodeGen::sort_symbol_list(gen->kinds_in_declaration_order, gen->kinds,
        CodeGen::in_declaration_md_order);
    CodeGen::sort_symbol_list(gen->instances_in_declaration_order, gen->instances,
        CodeGen::in_declaration_md_order);

§3.

void CodeGen::gather_up(inter_tree *I, inter_tree_node *P, void *state) {
    code_generation *gen = (code_generation *) state;
    switch (Inode::get_construct_ID(P)) {
        case VARIABLE_IST: {
            inter_symbol *var_name = VariableInstruction::variable(P);
            ADD_TO_LINKED_LIST(var_name, inter_symbol, gen->global_variables);
            break;
        }
        case PROPERTY_IST: {
            inter_symbol *prop_name = PropertyInstruction::property(P);
            if (SymbolAnnotation::get_b(prop_name, ASSIMILATED_IANN))
                ADD_TO_LINKED_LIST(prop_name, inter_symbol, gen->assimilated_properties);
            else
                ADD_TO_LINKED_LIST(prop_name, inter_symbol, gen->unassimilated_properties);
            break;
        }
        case INSTANCE_IST: {
            inter_symbol *inst_name = InstanceInstruction::instance(P);
            ADD_TO_LINKED_LIST(inst_name, inter_symbol, gen->instances);
            break;
        }
        case TYPENAME_IST: {
            inter_symbol *typename = TypenameInstruction::typename(P);
            ADD_TO_LINKED_LIST(typename, inter_symbol, gen->kinds);
            break;
        }
    }
}

void CodeGen::sort_symbol_list(linked_list *to_L, linked_list *L,
    int (*sorter)(const void *elem1, const void *elem2)) {
    int N = LinkedLists::len(L);
    if (N > 0) {
        inter_symbol **array = (inter_symbol **)
            (Memory::calloc(N, sizeof(inter_symbol *), CODE_GENERATION_MREASON));
        int i=0;
        inter_symbol *sym;
        LOOP_OVER_LINKED_LIST(sym, inter_symbol, L) array[i++] = sym;
        qsort(array, (size_t) N, sizeof(inter_symbol *), sorter);
        for (int j=0; j<N; j++) ADD_TO_LINKED_LIST(array[j], inter_symbol, to_L);
        Memory::I7_array_free(array, CODE_GENERATION_MREASON, N, sizeof(inter_symbol *));
    }
}

§4. Sorting by annotation value. We stopped doing this in March 2022, but the function seems worth keeping around. It provides a criterion for sorting a list of symbols by looking at the values of a numerical annotation, with symbols not having that annotation pushed to the back.

int CodeGen::in_annotation_order(const void *elem1, const void *elem2, inter_ti annot) {
    const inter_symbol **e1 = (const inter_symbol **) elem1;
    const inter_symbol **e2 = (const inter_symbol **) elem2;
    if ((*e1 == NULL) || (*e2 == NULL))
        internal_error("Disaster while sorting kinds");
    int s1 = CodeGen::sequence_number(*e1, annot);
    int s2 = CodeGen::sequence_number(*e2, annot);
    if (s1 != s2) return s1-s2;
    return InterSymbol::sort_number(*e1) - InterSymbol::sort_number(*e2);
}
int CodeGen::sequence_number(const inter_symbol *kind_name, inter_ti annot) {
    int N = SymbolAnnotation::get_i(kind_name, annot);
    if (N >= 0) return N;
    return 100000000;
}

§5. Sorting by metadata value. The preferred alternative is to sort by the value of a metadata key present in the same packages as the symbols:

int CodeGen::in_source_md_order(const void *elem1, const void *elem2) {
    return CodeGen::in_md_order(elem1, elem2, I"^source_order");
}
int CodeGen::in_declaration_md_order(const void *elem1, const void *elem2) {
    return CodeGen::in_md_order(elem1, elem2, I"^declaration_order");
}
int CodeGen::in_md_order(const void *elem1, const void *elem2, text_stream *key) {
    const inter_symbol **e1 = (const inter_symbol **) elem1;
    const inter_symbol **e2 = (const inter_symbol **) elem2;
    if ((*e1 == NULL) || (*e2 == NULL))
        internal_error("Disaster while sorting kinds");
    int s1 = CodeGen::md_number(*e1, key);
    int s2 = CodeGen::md_number(*e2, key);
    if (s1 != s2) return s1-s2;
    return InterSymbol::sort_number(*e1) - InterSymbol::sort_number(*e2);
}
int CodeGen::md_number(const inter_symbol *owner_name, text_stream *key) {
    inter_package *pack = InterSymbol::package((inter_symbol *) owner_name);
    if (Metadata::exists(pack, key))
        return (int) Metadata::read_optional_numeric(pack, key);
    return 100000000;
}

§6. Ad hoc generation. This module would be more elegant if the following function did not exist. But it is a consequence of the (+ ... +) feature of Inform, which plunges right through all kinds of conceptual barriers better left unplunged-through. Happily, it's both limited and little-used. The task is to turn a single Inter value-pair into the text of an expression which will represent it at run-time.

This is called by Inform 7 during a drastically earlier phase of compilation, long before the final module would otherwise be involved, and there's no question of performing a full generation of an entire Inter tree. So we make a sort of mock-generation object, the ad_hoc_generation, just for the purpose of this function call. We could make a new mock object every time, because there aren't such a lot of calls to this function, but instead we make just one and re-use it.

The mock generator makes no use of segmentation (see below) except for the single temporary segment, which is set to OUT.

code_generation *ad_hoc_generation = NULL;

void CodeGen::val_to_text(OUTPUT_STREAM, inter_bookmark *IBM, inter_pair val, target_vm *VM) {
    if (ad_hoc_generation == NULL) {
        if (VM == NULL) internal_error("no VM given");
        code_generator *generator = Generators::find_for(VM);
        if (generator == NULL) internal_error("VM family with no generator");
        ad_hoc_generation =
            CodeGen::new_generation(NULL, NULL, NULL, InterBookmark::tree(IBM),
                NULL, generator, VM, TRUE);
    }
    code_generator *generator = Generators::find_for(VM);
    if (generator == NULL) internal_error("VM family with no generator");
    ad_hoc_generation->for_VM = VM;
    ad_hoc_generation->generator = generator;

    CodeGen::select_temporary(ad_hoc_generation, OUT);
    CodeGen::pair_at_bookmark(ad_hoc_generation, IBM, val);
    CodeGen::deselect_temporary(ad_hoc_generation);
}

§7. Literal text modes. There are three of these. PRINTING_LTM is used when text is needed only immediately as an operand for, say, !print and will therefore never be a value at runtime; BOX_LTM for "quotation box" text; REGULAR_LTM for everything else.

define REGULAR_LTM 0
define BOX_LTM 1
define PRINTING_LTM 2
void CodeGen::lt_mode(code_generation *gen, int m) {
    gen->literal_text_mode = m;
}

§8. Segmentation. Generators have flexibility in how they go about their business, but if they are making what amounts to a text file with a lot of internal structure then the following system may be a convenience. It allows text to be assembled as a set of "segments" which can be appended to in any order, and which are then put together in a logical order at the end.

Segments are identified by ID numbers counting up from 0, 1, 2, ...: but ID number 0 is no_I7CGS, reserved to mean "not a segment".

A segment is itself internally stratified into numbered "layers", and these are used to help generators cope with more nuanced ordering issues — e.g., where two declarations are of basically the same sort of thing, and should be in the same segment as each other; but where one must nevertheless precede the other. This can be achieved by putting the one to come first at a lower-numbered level.

enum no_I7CGS from 0
define MAX_LAYERS_PER_SEGMENT 128
typedef struct generated_segment {
    int layers;
    struct text_stream *generated_code[MAX_LAYERS_PER_SEGMENT];
    CLASS_DEFINITION
} generated_segment;

generated_segment *CodeGen::new_segment(void) {
    generated_segment *seg = CREATE(generated_segment);
    seg->layers = MAX_LAYERS_PER_SEGMENT;
    for (int i=0; i<seg->layers; i++) seg->generated_code[i] = Str::new();
    return seg;
}

§9. Each generation has its own copy of every possible numbered segment, though by default those are NULL.

typedef struct segmentation_data {
    struct generated_segment *segments[NO_DEFINED_I7CGS_VALUES];
    struct linked_list *segment_sequence;  of generated_segment
    struct linked_list *additional_segment_sequence;  of generated_segment
    struct text_stream *temporarily_diverted_to;
    int temporarily_diverted;  to the temporary segment
    struct segmentation_pos pos;
} segmentation_data;

typedef struct segmentation_pos {
    struct generated_segment *current_segment;  the one currently being written to
    int current_layer;  within that segment: in the range 0 to current_segment->layers - 1
} segmentation_pos;

segmentation_data CodeGen::new_segmentation_data(void) {
    segmentation_data sd;
    sd.segment_sequence = NEW_LINKED_LIST(generated_segment);
    sd.additional_segment_sequence = NEW_LINKED_LIST(generated_segment);
    sd.temporarily_diverted = FALSE;
    sd.temporarily_diverted_to = NULL;
    for (int i=0; i<NO_DEFINED_I7CGS_VALUES; i++) sd.segments[i] = NULL;
    sd.pos.current_segment = NULL;
    sd.pos.current_layer = 1;
    return sd;
}

§10. If a generator wants to use this system, it should call CodeGen::create_segments to say which segments it wants to be created, passing an array of ID numbers. The order of these is significant — it's the order in which they will appear in the final output.

void CodeGen::create_segments(code_generation *gen, void *data, int codes[]) {
    gen->segmentation.segment_sequence = NEW_LINKED_LIST(generated_segment);
    for (int i=0; codes[i] >= 0; i++) {
        if ((codes[i] >= NO_DEFINED_I7CGS_VALUES) ||
            (codes[i] == no_I7CGS)) internal_error("bad segment sequence");
        gen->segmentation.segments[codes[i]] = CodeGen::new_segment();
        ADD_TO_LINKED_LIST(gen->segmentation.segments[codes[i]], generated_segment,
            gen->segmentation.segment_sequence);
    }
    gen->generator_private_data = data;
}

§11. An optional "alternative" set can also be created.

void CodeGen::additional_segments(code_generation *gen, int codes[]) {
    gen->segmentation.additional_segment_sequence = NEW_LINKED_LIST(generated_segment);
    for (int i=0; codes[i] >= 0; i++) {
        if ((codes[i] >= NO_DEFINED_I7CGS_VALUES) ||
            (codes[i] == no_I7CGS)) internal_error("bad segment sequence");
        gen->segmentation.segments[codes[i]] = CodeGen::new_segment();
        ADD_TO_LINKED_LIST(gen->segmentation.segments[codes[i]], generated_segment,
            gen->segmentation.additional_segment_sequence);
    }
}

§12. At any given time, a generation has a "current" segment, to which output is being written. The generator should use CodeGen::select to switch to a given segment, which must be one of those it has created, and then use CodeGen::deselect to go back to where it was. These calls must be made in properly nested pairs.

At some point we may want to make the cap on the number of layers flexible, but for now about 10 layers is plenty.

segmentation_pos CodeGen::select(code_generation *gen, int i) {
    return CodeGen::select_layered(gen, i, 1);
}

segmentation_pos CodeGen::select_layered(code_generation *gen, int i, int layer) {
    segmentation_pos previous_pos = gen->segmentation.pos;
    if (gen->segmentation.temporarily_diverted) internal_error("poorly timed selection");
    if ((i < 0) || (i >= NO_DEFINED_I7CGS_VALUES)) internal_error("out of range");
    if (gen->segmentation.segments[i] == NULL)
        internal_error("generator does not use this segment ID");
    if (layer >= gen->segmentation.segments[i]->layers)
        internal_error("too many layers");
    gen->segmentation.pos.current_segment = gen->segmentation.segments[i];
    gen->segmentation.pos.current_layer = layer;
    return previous_pos;
}

void CodeGen::deselect(code_generation *gen, segmentation_pos saved) {
    if (gen->segmentation.temporarily_diverted) internal_error("poorly timed deselection");
    gen->segmentation.pos = saved;
}

§13. However, we can also temporarily divert the whole system to send its text to some temporary stream somewhere. For that, use the following pair:

void CodeGen::select_temporary(code_generation *gen, text_stream *T) {
    if (gen->segmentation.temporarily_diverted) internal_error("nested temporary segments");
    gen->segmentation.temporarily_diverted_to = T;
    gen->segmentation.temporarily_diverted = TRUE;
}

void CodeGen::deselect_temporary(code_generation *gen) {
    gen->segmentation.temporarily_diverted_to = NULL;
    gen->segmentation.temporarily_diverted = FALSE;
}

§14. The following returns the text stream a generator should write to. Note that if it has been "temporarily diverted" then the regular selection is ignored.

text_stream *CodeGen::current(code_generation *gen) {
    if (gen->segmentation.temporarily_diverted)
        return gen->segmentation.temporarily_diverted_to;
    if (gen->segmentation.pos.current_segment == NULL) return NULL;
    return gen->segmentation.pos.current_segment->
        generated_code[gen->segmentation.pos.current_layer];
}

§15. And then all we do is concatenate them in order:

void CodeGen::write_segments(OUTPUT_STREAM, code_generation *gen) {
    generated_segment *seg;
    LOOP_OVER_LINKED_LIST(seg, generated_segment,
        gen->segmentation.segment_sequence)
            CodeGen::write_segment(OUT, seg);
}

void CodeGen::write_additional_segments(OUTPUT_STREAM, code_generation *gen) {
    generated_segment *seg;
    LOOP_OVER_LINKED_LIST(seg, generated_segment,
        gen->segmentation.additional_segment_sequence)
            CodeGen::write_segment(OUT, seg);
}

void CodeGen::write_segment(OUTPUT_STREAM, generated_segment *seg) {
    for (int i=0; i<seg->layers; i++)
        WRITE("%S", seg->generated_code[i]);
}

§16. Transients. Transient flags on symbols are used temporarily during code generation, but do not change the meaning of the tree: they're just a way to keep track of, say, what we've worked on so far.

void CodeGen::clear_all_transients(inter_tree *I) {
    InterTree::traverse(I, CodeGen::clear_transients, NULL, NULL, PACKAGE_IST);
}

void CodeGen::clear_transients(inter_tree *I, inter_tree_node *P, void *state) {
    inter_package *pack = PackageInstruction::at_this_head(P);
    inter_symbols_table *T = InterPackage::scope(pack);
    LOOP_OVER_SYMBOLS_TABLE(S, T)
        InterSymbol::clear_transient_flags(S);
}

§17. In particular the TRAVERSE_MARK_ISYMF flag is sometimes convenient to use.

int CodeGen::marked(inter_symbol *symb_name) {
    return InterSymbol::get_flag(symb_name, TRAVERSE_MARK_ISYMF);
}

void CodeGen::mark(inter_symbol *symb_name) {
    InterSymbol::set_flag(symb_name, TRAVERSE_MARK_ISYMF);
}

void CodeGen::unmark(inter_symbol *symb_name) {
    InterSymbol::clear_flag(symb_name, TRAVERSE_MARK_ISYMF);
}

§18. Value pairs. We will very often need to compile an expression from a pair val1, val2 extracted from some Inter instruction.

void CodeGen::pair_at_bookmark(code_generation *gen, inter_bookmark *IBM, inter_pair val) {
    inter_symbols_table *T = IBM?(InterBookmark::scope(IBM)):NULL;
    Generate from a value pair18.1;
}

void CodeGen::pair(code_generation *gen, inter_tree_node *P, inter_pair val) {
    inter_symbols_table *T = P?(InterPackage::scope_of(P)):NULL;
    Generate from a value pair18.1;
}

§18.1. Generate from a value pair18.1 =

    inter_tree *I = gen->from;
    text_stream *OUT = CodeGen::current(gen);
    if (InterValuePairs::is_symbolic(val)) {
        inter_symbol *s = InterValuePairs::to_symbol(val, T);
        if (s == NULL) internal_error("bad symbol in Inter pair");
        Generators::compile_literal_symbol(gen, s);
    } else if (InterValuePairs::is_number(val)) {
        Generators::compile_literal_number(gen, InterValuePairs::to_number(val), FALSE);
    } else if (InterValuePairs::is_real(val)) {
        Generators::compile_literal_real(gen, InterValuePairs::to_textual_real(I, val));
    } else if (InterValuePairs::is_singular_dword(val)) {
        Generators::compile_dictionary_word(gen, InterValuePairs::to_dictionary_word(I, val), FALSE);
    } else if (InterValuePairs::is_plural_dword(val)) {
        Generators::compile_dictionary_word(gen, InterValuePairs::to_dictionary_word(I, val), TRUE);
    } else if (InterValuePairs::is_text(val)) {
        Generators::compile_literal_text(gen, InterValuePairs::to_text(I, val), TRUE);
    } else if (InterValuePairs::is_glob(val)) {
        WRITE("%S", InterValuePairs::to_glob_text(I, val));
    } else {
        internal_error("unimplemented data pair format");
    }