How arrays of all kinds are stored in C.


§1. Setting up the model.

void CMemoryModel::initialise(code_generator *cgt) {
    METHOD_ADD(cgt, BEGIN_ARRAY_MTID, CMemoryModel::begin_array);
    METHOD_ADD(cgt, ARRAY_ENTRY_MTID, CMemoryModel::array_entry);
    METHOD_ADD(cgt, COMPILE_LITERAL_SYMBOL_MTID, CMemoryModel::compile_literal_symbol);
    METHOD_ADD(cgt, ARRAY_ENTRIES_MTID, CMemoryModel::array_entries);
    METHOD_ADD(cgt, END_ARRAY_MTID, CMemoryModel::end_array);
}

typedef struct C_generation_memory_model_data {
    int himem;  high point of memory: 1 more than the largest legal address
    struct text_stream *array_name;
    int entry_count;
    int next_node_is_a_ref;
} C_generation_memory_model_data;

void CMemoryModel::initialise_data(code_generation *gen) {
    C_GEN_DATA(memdata.himem) = 0;
    C_GEN_DATA(memdata.array_name) = Str::new();
    C_GEN_DATA(memdata.entry_count) = 0;
    C_GEN_DATA(memdata.next_node_is_a_ref) = FALSE;
}

§2. Byte-addressable memory. The Inter semantics require that there be an area of byte-accessible memory:

We will manage that with a single C array.

§3. Declaring that array is our main task in this section.

void CMemoryModel::begin(code_generation *gen) {
    generated_segment *saved = CodeGen::select(gen, c_mem_I7CGS);
    text_stream *OUT = CodeGen::current(gen);
    WRITE("i7byte_t i7_initial_memory[] = {\n");
    for (int i=0; i<64; i++) WRITE("0, "); WRITE("/* header */\n");
    C_GEN_DATA(memdata.himem) += 64;
    CodeGen::deselect(gen, saved);
}

§4. We will end the array with two dummy bytes (which should never be accessed) just in case, and to ensure that it is never empty, which would be illegal in C.

void CMemoryModel::end(code_generation *gen) {
    generated_segment *saved = CodeGen::select(gen, c_mem_I7CGS);
    text_stream *OUT = CodeGen::current(gen);
    WRITE("0, 0 };\n");

    CodeGen::deselect(gen, saved);

    saved = CodeGen::select(gen, c_ids_and_maxima_I7CGS);
    OUT = CodeGen::current(gen);
    WRITE("#define i7_static_himem %d\n", C_GEN_DATA(memdata.himem));
    CodeGen::deselect(gen, saved);
}

§5.

void i7_initialise_state(i7process_t *proc);
i7byte_t i7_initial_memory[];
void i7_initialise_state(i7process_t *proc) {
    if (proc->state.memory != NULL) free(proc->state.memory);
    i7byte_t *mem = calloc(i7_static_himem, sizeof(i7byte_t));
    if (mem == NULL) {
        printf("Memory allocation failed\n");
        i7_fatal_exit(proc);
    }
    proc->state.memory = mem;
    proc->state.himem = i7_static_himem;
    for (int i=0; i<i7_static_himem; i++) mem[i] = i7_initial_memory[i];
    #ifdef i7_mgl_Release
    mem[0x34] = I7BYTE_2(i7_mgl_Release);
    mem[0x35] = I7BYTE_3(i7_mgl_Release);
    #endif
    #ifndef i7_mgl_Release
    mem[0x34] = I7BYTE_2(1);
    mem[0x35] = I7BYTE_3(1);
    #endif
    #ifdef i7_mgl_Serial
    char *p = i7_text_of_string(i7_mgl_Serial);
    for (int i=0; i<6; i++) mem[0x36 + i] = p[i];
    #endif
    #ifndef i7_mgl_Serial
    for (int i=0; i<6; i++) mem[0x36 + i] = '0';
    #endif
    proc->state.stack_pointer = 0;

    proc->state.i7_object_tree_parent  = calloc(i7_max_objects, sizeof(i7word_t));
    proc->state.i7_object_tree_child   = calloc(i7_max_objects, sizeof(i7word_t));
    proc->state.i7_object_tree_sibling = calloc(i7_max_objects, sizeof(i7word_t));

    if ((proc->state.i7_object_tree_parent == NULL) ||
        (proc->state.i7_object_tree_child == NULL) ||
        (proc->state.i7_object_tree_sibling == NULL)) {
        printf("Memory allocation failed\n");
        i7_fatal_exit(proc);
    }
    for (int i=0; i<i7_max_objects; i++) {
        proc->state.i7_object_tree_parent[i] = 0;
        proc->state.i7_object_tree_child[i] = 0;
        proc->state.i7_object_tree_sibling[i] = 0;
    }

    proc->state.variables = calloc(i7_no_variables, sizeof(i7word_t));
    if (proc->state.variables == NULL) {
        printf("Memory allocation failed\n");
        i7_fatal_exit(proc);
    }
    for (int i=0; i<i7_no_variables; i++)
        proc->state.variables[i] = i7_initial_variable_values[i];
}

§6. Reading and writing memory. Given the above array, it's easy to read and write bytes. Words are more challenging since we need to pack and unpack them.

The following function reads a word which is in entry array_index (counting 0, 1, 2, ...) in the array which begins at the byte address array_address.

i7byte_t i7_read_byte(i7process_t *proc, i7word_t address);
i7word_t i7_read_word(i7process_t *proc, i7word_t array_address, i7word_t array_index);
i7byte_t i7_read_byte(i7process_t *proc, i7word_t address) {
    return proc->state.memory[address];
}

i7word_t i7_read_word(i7process_t *proc, i7word_t array_address, i7word_t array_index) {
    i7byte_t *data = proc->state.memory;
    int byte_position = array_address + 4*array_index;
    if ((byte_position < 0) || (byte_position >= i7_static_himem)) {
        printf("Memory access out of range: %d\n", byte_position);
        i7_fatal_exit(proc);
    }
    return             (i7word_t) data[byte_position + 3]      +
                0x100*((i7word_t) data[byte_position + 2]) +
              0x10000*((i7word_t) data[byte_position + 1]) +
            0x1000000*((i7word_t) data[byte_position + 0]);
}

§7. Packing, unlike unpacking, is done with macros so that it is possible to express a packed word in constant context, which we will need later.

#define I7BYTE_0(V) ((V & 0xFF000000) >> 24)
#define I7BYTE_1(V) ((V & 0x00FF0000) >> 16)
#define I7BYTE_2(V) ((V & 0x0000FF00) >> 8)
#define I7BYTE_3(V)  (V & 0x000000FF)

void i7_write_byte(i7process_t *proc, i7word_t address, i7byte_t new_val);
i7word_t i7_write_word(i7process_t *proc, i7word_t array_address, i7word_t array_index, i7word_t new_val, int way);
void i7_write_byte(i7process_t *proc, i7word_t address, i7byte_t new_val) {
    proc->state.memory[address] = new_val;
}

i7byte_t i7_change_byte(i7process_t *proc, i7word_t address, i7byte_t new_val, int way) {
    i7byte_t old_val = i7_read_byte(proc, address);
    i7byte_t return_val = new_val;
    switch (way) {
        case i7_lvalue_PREDEC:   return_val = old_val-1;   new_val = old_val-1; break;
        case i7_lvalue_POSTDEC:  return_val = old_val; new_val = old_val-1; break;
        case i7_lvalue_PREINC:   return_val = old_val+1;   new_val = old_val+1; break;
        case i7_lvalue_POSTINC:  return_val = old_val; new_val = old_val+1; break;
        case i7_lvalue_SETBIT:   new_val = old_val | new_val; return_val = new_val; break;
        case i7_lvalue_CLEARBIT: new_val = old_val &(~new_val); return_val = new_val; break;
    }
    i7_write_byte(proc, address, new_val);
    return return_val;
}

i7word_t i7_write_word(i7process_t *proc, i7word_t array_address, i7word_t array_index, i7word_t new_val, int way) {
    i7byte_t *data = proc->state.memory;
    i7word_t old_val = i7_read_word(proc, array_address, array_index);
    i7word_t return_val = new_val;
    switch (way) {
        case i7_lvalue_PREDEC:   return_val = old_val-1;   new_val = old_val-1; break;
        case i7_lvalue_POSTDEC:  return_val = old_val; new_val = old_val-1; break;
        case i7_lvalue_PREINC:   return_val = old_val+1;   new_val = old_val+1; break;
        case i7_lvalue_POSTINC:  return_val = old_val; new_val = old_val+1; break;
        case i7_lvalue_SETBIT:   new_val = old_val | new_val; return_val = new_val; break;
        case i7_lvalue_CLEARBIT: new_val = old_val &(~new_val); return_val = new_val; break;
    }
    int byte_position = array_address + 4*array_index;
    if ((byte_position < 0) || (byte_position >= i7_static_himem)) {
        printf("Memory access out of range: %d\n", byte_position);
        i7_fatal_exit(proc);
    }
    data[byte_position]   = I7BYTE_0(new_val);
    data[byte_position+1] = I7BYTE_1(new_val);
    data[byte_position+2] = I7BYTE_2(new_val);
    data[byte_position+3] = I7BYTE_3(new_val);
    return return_val;
}

§8. "Short" 16-bit numbers can also be accessed:

void glulx_aloads(i7process_t *proc, i7word_t x, i7word_t y, i7word_t *z);
void glulx_aloads(i7process_t *proc, i7word_t x, i7word_t y, i7word_t *z) {
    if (z) *z = 0x100*((i7word_t) i7_read_byte(proc, x+2*y)) + ((i7word_t) i7_read_byte(proc, x+2*y+1));
}

§9. A Glulx assembly opcode is provided for fast memory copies:

void glulx_mcopy(i7process_t *proc, i7word_t x, i7word_t y, i7word_t z);
void glulx_malloc(i7process_t *proc, i7word_t x, i7word_t y);
void glulx_mfree(i7process_t *proc, i7word_t x);
void glulx_mcopy(i7process_t *proc, i7word_t x, i7word_t y, i7word_t z) {
    if (z < y)
        for (i7word_t i=0; i<x; i++)
            i7_write_byte(proc, z+i, i7_read_byte(proc, y+i));
    else
        for (i7word_t i=x-1; i>=0; i--)
            i7_write_byte(proc, z+i, i7_read_byte(proc, y+i));
}

void glulx_malloc(i7process_t *proc, i7word_t x, i7word_t y) {
    printf("Unimplemented: glulx_malloc.\n");
    i7_fatal_exit(proc);
}

void glulx_mfree(i7process_t *proc, i7word_t x) {
    printf("Unimplemented: glulx_mfree.\n");
    i7_fatal_exit(proc);
}

§10. Populating memory with arrays. Inter supports four sorts of arrays, with behaviour as laid out in this 2x2 grid:

             | entries count 0, 1, 2,...     | entry 0 is N, then entries count 1, 2, ..., N
-------------+-------------------------------+-----------------------------------------------
byte entries | BYTE_ARRAY_FORMAT             | BUFFER_ARRAY_FORMAT
-------------+-------------------------------+-----------------------------------------------
word entries | WORD_ARRAY_FORMAT             | TABLE_ARRAY_FORMAT
-------------+-------------------------------+-----------------------------------------------
int CMemoryModel::begin_array(code_generator *cgt, code_generation *gen,
    text_stream *array_name, inter_symbol *array_s, inter_tree_node *P, int format) {
    Str::clear(C_GEN_DATA(memdata.array_name));
    WRITE_TO(C_GEN_DATA(memdata.array_name), "%S", array_name);
    C_GEN_DATA(memdata.entry_count) = 0;

    if ((array_s) && (Inter::Symbols::read_annotation(array_s, VERBARRAY_IANN) == 1)) {
        CLiteralsModel::verb_grammar(cgt, gen, array_s, P);
        return FALSE;
    }

    text_stream *format_name = I"unknown";
    Work out the format name10.1;
    Define a constant for the byte address in memory where the array begins10.2;
    if ((format == TABLE_ARRAY_FORMAT) || (format == BUFFER_ARRAY_FORMAT))
        Place the extent entry N at index 010.3;
    return TRUE;
}

§10.1. Work out the format name10.1 =

    switch (format) {
        case BYTE_ARRAY_FORMAT: format_name = I"byte"; break;
        case WORD_ARRAY_FORMAT: format_name = I"word"; break;
        case BUFFER_ARRAY_FORMAT: format_name = I"buffer"; break;
        case TABLE_ARRAY_FORMAT: format_name = I"table"; break;
    }

§10.2. Crucially, the array names are #define constants declared up at the top of the source code: they are not variables with pointer types, or something like that. This means they can legally be used as values elsewhere in memory, or as initial values of variables, and so on.

Object, class and function names can also legally appear as array entries, because they too are defined constants, equal to their IDs: see C Object Model.

Define a constant for the byte address in memory where the array begins10.2 =

    generated_segment *saved = CodeGen::select(gen, c_predeclarations_I7CGS);
    text_stream *OUT = CodeGen::current(gen);
    WRITE("#define ");
    CNamespace::mangle(cgt, OUT, array_name);
    WRITE(" %d /* = position in memory of %S array %S */\n",
        C_GEN_DATA(memdata.himem), format_name, array_name);
    CodeGen::deselect(gen, saved);

§10.3. Of course, right now we don't know N, the extent of the array. So we will refer to this with a constant like xt_myarray, which we will retrospectively predefine when the array ends.

Place the extent entry N at index 010.3 =

    TEMPORARY_TEXT(extname)
    WRITE_TO(extname, "xt_");
    CNamespace::mangle(cgt, extname, array_name);
    CMemoryModel::array_entry(cgt, gen, extname, format);
    DISCARD_TEXT(extname)

§11. The call to CMemoryModel::begin_array is then followed by a series of calls to:

void CMemoryModel::array_entry(code_generator *cgt, code_generation *gen,
    text_stream *entry, int format) {
    generated_segment *saved = CodeGen::select(gen, c_mem_I7CGS);
    text_stream *OUT = CodeGen::current(gen);
    if ((format == TABLE_ARRAY_FORMAT) || (format == WORD_ARRAY_FORMAT))
        This is a word entry11.2
    else
        This is a byte entry11.1;
    CodeGen::deselect(gen, saved);
    C_GEN_DATA(memdata.entry_count)++;
}

§11.1. This is a byte entry11.1 =

    WRITE("    (i7byte_t) %S, /* %d */\n", entry, C_GEN_DATA(memdata.himem));
    C_GEN_DATA(memdata.himem) += 1;

§11.2. Now we see why it was important for I7BYTE_0 and so on to be macros: they use only arithmetic operations which can be constant-folded by the C compiler, and therefore if X is a valid constant-context expression in C then so is I7BYTE_0(X).

This is a word entry11.2 =

    WRITE("    I7BYTE_0(%S), I7BYTE_1(%S), I7BYTE_2(%S), I7BYTE_3(%S), /* %d */\n",
        entry, entry, entry, entry, C_GEN_DATA(memdata.himem));
    C_GEN_DATA(memdata.himem) += 4;

§12.

void CMemoryModel::compile_literal_symbol(code_generator *cgt, code_generation *gen, inter_symbol *aliased) {
    text_stream *OUT = CodeGen::current(gen);
    text_stream *S = Inter::Symbols::name(aliased);
    Generators::mangle(gen, OUT, S);
}

§13. Alternatively, we can just specify how many entries there will be: they will then be initialised to 0.

void CMemoryModel::array_entries(code_generator *cgt, code_generation *gen,
    int how_many, int plus_ips, int format) {
    if (plus_ips) how_many += 64;
    for (int i=0; i<how_many; i++) CMemoryModel::array_entry(cgt, gen, I"0", format);
}

§14. When all the entries have been placed, the following is called. It does nothing except to predeclare the extent constant, if one was used.

void CMemoryModel::end_array(code_generator *cgt, code_generation *gen, int format) {
    generated_segment *saved = CodeGen::select(gen, c_predeclarations_I7CGS);
    text_stream *OUT = CodeGen::current(gen);
    WRITE("#define xt_");
    CNamespace::mangle(cgt, OUT, C_GEN_DATA(memdata.array_name));
    WRITE(" %d\n", C_GEN_DATA(memdata.entry_count)-1);
    CodeGen::deselect(gen, saved);
}

§15. Primitives for byte and word lookup. The signatures here are:

primitive !lookup val val -> val
primitive !lookupbyte val val -> val
int CMemoryModel::handle_store_by_ref(code_generation *gen, inter_tree_node *ref) {
    if (Inter::Reference::node_is_ref_to(gen->from, ref, LOOKUP_BIP)) return TRUE;
    if (Inter::Reference::node_is_ref_to(gen->from, ref, LOOKUPBYTE_BIP)) return TRUE;
    return FALSE;
}

int CMemoryModel::invoke_primitive(code_generation *gen, inter_ti bip, inter_tree_node *P) {
    text_stream *OUT = CodeGen::current(gen);
    switch (bip) {
        case LOOKUP_BIP:     if (CReferences::am_I_a_ref(gen)) Word value as reference15.2
                             else Word value as value15.1;
                             break;
        case LOOKUPBYTE_BIP: if (CReferences::am_I_a_ref(gen)) Byte value as reference15.4
                             else Byte value as value15.3; break;
        default:             return NOT_APPLICABLE;
    }
    return FALSE;
}

§15.1. Word value as value15.1 =

    WRITE("i7_read_word(proc, "); VNODE_1C; WRITE(", "); VNODE_2C; WRITE(")");

§15.2. Word value as reference15.2 =

    WRITE("i7_write_word(proc, "); VNODE_1C; WRITE(", "); VNODE_2C; WRITE(", ");

§15.3. Byte value as value15.3 =

    WRITE("i7_read_byte(proc, "); VNODE_1C; WRITE(" + "); VNODE_2C; WRITE(")");

§15.4. Byte value as reference15.4 =

    WRITE("i7_change_byte(proc, "); VNODE_1C; WRITE(" + "); VNODE_2C; WRITE(", ");