How arrays of all kinds are stored in C.


§1. Setting up the model.

void CMemoryModel::initialise(code_generation_target *cgt) {
    METHOD_ADD(cgt, BEGIN_ARRAY_MTID, CMemoryModel::begin_array);
    METHOD_ADD(cgt, ARRAY_ENTRY_MTID, CMemoryModel::array_entry);
    METHOD_ADD(cgt, COMPILE_LITERAL_SYMBOL_MTID, CMemoryModel::compile_literal_symbol);
    METHOD_ADD(cgt, ARRAY_ENTRIES_MTID, CMemoryModel::array_entries);
    METHOD_ADD(cgt, END_ARRAY_MTID, CMemoryModel::end_array);
}

typedef struct C_generation_memory_model_data {
    int himem;  high point of memory: 1 more than the largest legal address
    struct text_stream *array_name;
    int entry_count;
    int next_node_is_a_ref;
} C_generation_memory_model_data;

void CMemoryModel::initialise_data(code_generation *gen) {
    C_GEN_DATA(memdata.himem) = 0;
    C_GEN_DATA(memdata.array_name) = Str::new();
    C_GEN_DATA(memdata.entry_count) = 0;
    C_GEN_DATA(memdata.next_node_is_a_ref) = FALSE;
}

§2. Byte-addressable memory. The Inter semantics require that there be an area of byte-accessible memory:

We will manage that with a single C array. This is first predeclared here:

i7byte i7mem[];

§3. Declaring that array is our main task in this section.

void CMemoryModel::begin(code_generation *gen) {
    generated_segment *saved = CodeGen::select(gen, c_mem_I7CGS);
    text_stream *OUT = CodeGen::current(gen);
    WRITE("i7byte i7mem[] = {\n");
    CodeGen::deselect(gen, saved);
}

§4. We will end the array with two dummy bytes (which should never be accessed) just in case, and to ensure that it is never empty, which would be illegal in C.

void CMemoryModel::end(code_generation *gen) {
    generated_segment *saved = CodeGen::select(gen, c_mem_I7CGS);
    text_stream *OUT = CodeGen::current(gen);
    WRITE("0, 0 };\n");
    CodeGen::deselect(gen, saved);

    saved = CodeGen::select(gen, c_ids_and_maxima_I7CGS);
    OUT = CodeGen::current(gen);
    WRITE("#define i7_himem %d\n", C_GEN_DATA(memdata.himem));
    CodeGen::deselect(gen, saved);
}

§5. Reading and writing memory. Given the above array, it's easy to read and write bytes: if a is the address then we can simply refer to i7mem[a]. Words are more challenging since we need to pack and unpack them.

The following function reads a word which is in entry array_index (counting 0, 1, 2, ...) in the array which begins at the byte address array_address in the bank of memory data. In practice, we will only every use this function with data set to i7mem.

The equivalent for reading a byte entry is data[array_address + array_index].

i7val i7_read_word(i7byte data[], i7val array_address, i7val array_index) {
    int byte_position = array_address + 4*array_index;
    if ((byte_position < 0) || (byte_position >= i7_himem)) {
        printf("Memory access out of range: %d\n", byte_position);
        i7_fatal_exit();
    }
    return             (i7val) data[byte_position + 3]      +
                0x100*((i7val) data[byte_position + 2]) +
              0x10000*((i7val) data[byte_position + 1]) +
            0x1000000*((i7val) data[byte_position + 0]);
}

§6. Packing, unlike unpacking, is done with macros so that it is possible to express a packed word in constant context, which we will need later.

#define I7BYTE_0(V) ((V & 0xFF000000) >> 24)
#define I7BYTE_1(V) ((V & 0x00FF0000) >> 16)
#define I7BYTE_2(V) ((V & 0x0000FF00) >> 8)
#define I7BYTE_3(V)  (V & 0x000000FF)

i7val i7_write_word(i7byte data[], i7val array_address, i7val array_index, i7val new_val, int way) {
    i7val old_val = i7_read_word(data, array_address, array_index);
    i7val return_val = new_val;
    switch (way) {
        case i7_lvalue_PREDEC:   return_val = old_val-1;   new_val = old_val-1; break;
        case i7_lvalue_POSTDEC:  return_val = old_val; new_val = old_val-1; break;
        case i7_lvalue_PREINC:   return_val = old_val+1;   new_val = old_val+1; break;
        case i7_lvalue_POSTINC:  return_val = old_val; new_val = old_val+1; break;
        case i7_lvalue_SETBIT:   new_val = old_val | new_val; return_val = new_val; break;
        case i7_lvalue_CLEARBIT: new_val = old_val &(~new_val); return_val = new_val; break;
    }
    int byte_position = array_address + 4*array_index;
    if ((byte_position < 0) || (byte_position >= i7_himem)) {
        printf("Memory access out of range: %d\n", byte_position);
        i7_fatal_exit();
    }
    data[byte_position]   = I7BYTE_0(new_val);
    data[byte_position+1] = I7BYTE_1(new_val);
    data[byte_position+2] = I7BYTE_2(new_val);
    data[byte_position+3] = I7BYTE_3(new_val);
    return return_val;
}

§7. "Short" 16-bit numbers can also be accessed:

void glulx_aloads(i7val x, i7val y, i7val *z) {
    if (z) *z = 0x100*((i7val) i7mem[x+2*y]) + ((i7val) i7mem[x+2*y+1]);
}

§8. A Glulx assembly opcode is provided for fast memory copies:

void glulx_mcopy(i7val x, i7val y, i7val z) {
    if (z < y)
        for (i7val i=0; i<x; i++) i7mem[z+i] = i7mem[y+i];
    else
        for (i7val i=x-1; i>=0; i--) i7mem[z+i] = i7mem[y+i];
}

void glulx_malloc(i7val x, i7val y) {
    printf("Unimplemented: glulx_malloc.\n");
    i7_fatal_exit();
}

void glulx_mfree(i7val x) {
    printf("Unimplemented: glulx_mfree.\n");
    i7_fatal_exit();
}

§9. Populating memory with arrays. Inter supports four sorts of arrays, with behaviour as laid out in this 2x2 grid:

             | entries count 0, 1, 2,...     | entry 0 is N, then entries count 1, 2, ..., N
-------------+-------------------------------+-----------------------------------------------
byte entries | BYTE_ARRAY_FORMAT             | BUFFER_ARRAY_FORMAT
-------------+-------------------------------+-----------------------------------------------
word entries | WORD_ARRAY_FORMAT             | TABLE_ARRAY_FORMAT
-------------+-------------------------------+-----------------------------------------------
int CMemoryModel::begin_array(code_generation_target *cgt, code_generation *gen,
    text_stream *array_name, inter_symbol *array_s, inter_tree_node *P, int format) {
    Str::clear(C_GEN_DATA(memdata.array_name));
    WRITE_TO(C_GEN_DATA(memdata.array_name), "%S", array_name);
    C_GEN_DATA(memdata.entry_count) = 0;

    if ((array_s) && (Inter::Symbols::read_annotation(array_s, VERBARRAY_IANN) == 1)) {
        CLiteralsModel::verb_grammar(cgt, gen, array_s, P);
        return FALSE;
    }

    text_stream *format_name = I"unknown";
    Work out the format name9.1;
    Define a constant for the byte address in memory where the array begins9.2;
    if ((format == TABLE_ARRAY_FORMAT) || (format == BUFFER_ARRAY_FORMAT))
        Place the extent entry N at index 09.3;
    return TRUE;
}

§9.1. Work out the format name9.1 =

    switch (format) {
        case BYTE_ARRAY_FORMAT: format_name = I"byte"; break;
        case WORD_ARRAY_FORMAT: format_name = I"word"; break;
        case BUFFER_ARRAY_FORMAT: format_name = I"buffer"; break;
        case TABLE_ARRAY_FORMAT: format_name = I"table"; break;
    }

§9.2. Crucially, the array names are #define constants declared up at the top of the source code: they are not variables with pointer types, or something like that. This means they can legally be used as values elsewhere in i7mem, or as initial values of variables, and so on.

Object, class and function names can also legally appear as array entries, because they too are defined constants, equal to their IDs: see C Object Model.

Define a constant for the byte address in memory where the array begins9.2 =

    generated_segment *saved = CodeGen::select(gen, c_predeclarations_I7CGS);
    text_stream *OUT = CodeGen::current(gen);
    WRITE("#define ");
    CNamespace::mangle(cgt, OUT, array_name);
    WRITE(" %d /* = position in i7mem of %S array %S */\n",
        C_GEN_DATA(memdata.himem), format_name, array_name);
    CodeGen::deselect(gen, saved);

§9.3. Of course, right now we don't know N, the extent of the array. So we will refer to this with a constant like xt_myarray, which we will retrospectively predefine when the array ends.

Place the extent entry N at index 09.3 =

    TEMPORARY_TEXT(extname)
    WRITE_TO(extname, "xt_%S", array_name);
    CMemoryModel::array_entry(cgt, gen, extname, format);
    DISCARD_TEXT(extname)

§10. The call to CMemoryModel::begin_array is then followed by a series of calls to:

void CMemoryModel::array_entry(code_generation_target *cgt, code_generation *gen,
    text_stream *entry, int format) {
    generated_segment *saved = CodeGen::select(gen, c_mem_I7CGS);
    text_stream *OUT = CodeGen::current(gen);
    if ((format == TABLE_ARRAY_FORMAT) || (format == WORD_ARRAY_FORMAT))
        This is a word entry10.2
    else
        This is a byte entry10.1;
    CodeGen::deselect(gen, saved);
    C_GEN_DATA(memdata.entry_count)++;
}

§10.1. This is a byte entry10.1 =

    WRITE("    (i7byte) %S, /* %d */\n", entry, C_GEN_DATA(memdata.himem));
    C_GEN_DATA(memdata.himem) += 1;

§10.2. Now we see why it was important for I7BYTE_0 and so on to be macros: they use only arithmetic operations which can be constant-folded by the C compiler, and therefore if X is a valid constant-context expression in C then so is I7BYTE_0(X).

This is a word entry10.2 =

    WRITE("    I7BYTE_0(%S), I7BYTE_1(%S), I7BYTE_2(%S), I7BYTE_3(%S), /* %d */\n",
        entry, entry, entry, entry, C_GEN_DATA(memdata.himem));
    C_GEN_DATA(memdata.himem) += 4;

§11.

void CMemoryModel::compile_literal_symbol(code_generation_target *cgt, code_generation *gen, inter_symbol *aliased, int unsub) {
    text_stream *OUT = CodeGen::current(gen);
    text_stream *S = CodeGen::CL::name(aliased);
    CodeGen::Targets::mangle(gen, OUT, S);
}

§12. Alternatively, we can just specify how many entries there will be: they will then be initialised to 0.

void CMemoryModel::array_entries(code_generation_target *cgt, code_generation *gen,
    int how_many, int plus_ips, int format) {
    if (plus_ips) how_many += 64;
    for (int i=0; i<how_many; i++) CMemoryModel::array_entry(cgt, gen, I"0", format);
}

§13. When all the entries have been placed, the following is called. It does nothing except to predeclare the extent constant, if one was used.

void CMemoryModel::end_array(code_generation_target *cgt, code_generation *gen, int format) {
    if ((format == TABLE_ARRAY_FORMAT) || (format == BUFFER_ARRAY_FORMAT)) {
        generated_segment *saved = CodeGen::select(gen, c_predeclarations_I7CGS);
        text_stream *OUT = CodeGen::current(gen);
        WRITE("#define xt_%S %d\n",
            C_GEN_DATA(memdata.array_name), C_GEN_DATA(memdata.entry_count)-1);
        CodeGen::deselect(gen, saved);
    }
}

§14. Primitives for byte and word lookup. The signatures here are:

primitive !lookup val val -> val
primitive !lookupbyte val val -> val
int CMemoryModel::handle_store_by_ref(code_generation *gen, inter_tree_node *ref) {
    if (CodeGen::CL::node_is_ref_to(gen->from, ref, LOOKUP_BIP)) return TRUE;
    return FALSE;
}

int CMemoryModel::compile_primitive(code_generation *gen, inter_ti bip, inter_tree_node *P) {
    text_stream *OUT = CodeGen::current(gen);
    switch (bip) {
        case LOOKUP_BIP:     if (CReferences::am_I_a_ref(gen)) Word value as reference14.2
                             else Word value as value14.1;
                             break;
        case LOOKUPBYTE_BIP: Byte value as value14.3; break;
        default:             return NOT_APPLICABLE;
    }
    return FALSE;
}

§14.1. Word value as value14.1 =

    WRITE("i7_read_word(i7mem, "); INV_A1; WRITE(", "); INV_A2; WRITE(")");

§14.2. Word value as reference14.2 =

    WRITE("i7_write_word(i7mem, "); INV_A1; WRITE(", "); INV_A2; WRITE(", ");

§14.3. Byte value as value14.3 =

    WRITE("i7mem["); INV_A1; WRITE(" + "); INV_A2; WRITE("]");