[InterConstruct::] Inter Constructs. There are around two dozen constructs in textual Inter source code, with each instruction in bytecode being a usage of one of them. @ Each different construct is represented by an instance of the following: = typedef struct inter_construct { inter_ti construct_ID; /* used to identify this in bytecode */ struct text_stream *construct_name; wchar_t recognition_regexp[MAX_RECOGNITION_REGEXP_LENGTH]; struct text_stream *syntax; int min_level; /* min node tree depth within its package */ int max_level; /* max node tree depth within its package */ int usage_permissions; /* a bitmap of the |*_ICUP| values */ struct method_set *methods; /* what it does is entirely specified by these */ CLASS_DEFINITION } inter_construct; inter_construct *InterConstruct::create_construct(inter_ti ID, text_stream *name) { inter_construct *IC = CREATE(inter_construct); IC->construct_ID = ID; IC->construct_name = Str::duplicate(name); IC->recognition_regexp[0] = 0; IC->min_level = 0; IC->max_level = 0; IC->usage_permissions = INSIDE_PLAIN_PACKAGE_ICUP; IC->methods = Methods::new_set(); InterConstruct::set_construct_for_ID(ID, IC); return IC; } @ Several fields specify restrictions on where, in an Inter tree, instructions using this construct can appear. |min_level| to |max_level|, inclusive, give the range of hierarchical levels within their packages which such instructions can occur at. By default, note that a construct can only be used at the top level of a package -- min and max both equal 0; and by default, it has no usage permissions at all. Those must be explicitly granted when a new construct is created. @d INFINITELY_DEEP 100000000 @d OUTSIDE_OF_PACKAGES_ICUP 1 @d INSIDE_PLAIN_PACKAGE_ICUP 2 @d INSIDE_CODE_PACKAGE_ICUP 4 @d CAN_HAVE_CHILDREN_ICUP 8 = void InterConstruct::permit(inter_construct *IC, int usage) { IC->usage_permissions |= usage; } void InterConstruct::allow_in_depth_range(inter_construct *IC, int l1, int l2) { IC->min_level = l1; IC->max_level = l2; } @ So here is the code to police those restrictions. First, for a node already in position: = inter_error_message *InterConstruct::check_permissions(inter_construct *IC, inter_package *pack, inter_error_location *eloc) { int need = INSIDE_PLAIN_PACKAGE_ICUP; if (pack == NULL) need = OUTSIDE_OF_PACKAGES_ICUP; else if (InterPackage::is_a_function_body(pack)) need = INSIDE_CODE_PACKAGE_ICUP; if ((IC->usage_permissions & need) != need) { text_stream *M = Str::new(); WRITE_TO(M, "construct '%S' cannot be used ", IC->construct_name); switch (need) { case OUTSIDE_OF_PACKAGES_ICUP: WRITE_TO(M, "outside packages"); break; case INSIDE_PLAIN_PACKAGE_ICUP: WRITE_TO(M, "inside non-code package '%S'", InterPackage::name(pack)); break; case INSIDE_CODE_PACKAGE_ICUP: WRITE_TO(M, "inside code package '%S'", InterPackage::name(pack)); break; } return Inter::Errors::plain(M, eloc); } return NULL; } @ Second, for a proposed use of node not yet in position -- this is used when reading textual inter, hence the message about indentation: = inter_error_message *InterConstruct::check_level_in_package(inter_bookmark *IBM, inter_ti ID, int level, inter_error_location *eloc) { inter_construct *proposed = InterConstruct::get_construct_for_ID(ID); if (proposed == NULL) return Inter::Errors::plain(I"no such construct", eloc); inter_package *pack = InterBookmark::package(IBM); int actual = level; if ((pack) && (InterPackage::is_a_root_package(pack) == FALSE)) actual = level - InterBookmark::baseline(IBM) - 1; if (actual < 0) return Inter::Errors::plain(I"impossible level", eloc); if ((actual < proposed->min_level) || (actual > proposed->max_level)) return Inter::Errors::plain(I"indentation error", eloc); return InterConstruct::check_permissions(proposed, pack, eloc); } @ A much more formidable check. This traverses an entire tree, and verifies that every construct is legally used: = typedef struct tree_lint_state { struct inter_package *package; inter_ti package_level; } tree_lint_state; void InterConstruct::tree_lint(inter_tree *I) { tree_lint_state tls; tls.package = I->root_package; tls.package_level = 0; InterConstruct::tree_lint_r(I, I->root_node, &tls); } void InterConstruct::tree_lint_r(inter_tree *I, inter_tree_node *P, tree_lint_state *tls) { LOOP_THROUGH_INTER_CHILDREN(C, P) { if (Inode::get_package(C) != tls->package) { WRITE_TO(STDERR, "Node gives package as "); InterPackage::write_URL(STDERR, Inode::get_package(C)); WRITE_TO(STDERR, " but it is actually in "); InterPackage::write_URL(STDERR, tls->package); WRITE_TO(STDERR, "\n"); internal_error("node in wrong package"); } inter_construct *IC = NULL; inter_error_message *E = InterConstruct::get_construct(C, &IC); if (E) Inter::Errors::issue(E); if (IC) { inter_error_location *eloc = Inode::get_error_location(C); E = InterConstruct::check_permissions(IC, tls->package, eloc); if (E) Inter::Errors::issue(E); E = InterConstruct::verify_children(C); if (E) { Inter::Errors::issue(E); Inter::Errors::backtrace(STDERR, C); } inter_ti level = C->W.instruction[LEVEL_IFLD]; inter_ti level_in_package = level; if (tls->package) level_in_package -= tls->package_level; if ((IC->construct_ID != PACKAGE_IST) && (((int) level_in_package < IC->min_level) || ((int) level_in_package > IC->max_level))) { text_stream *M = Str::new(); WRITE_TO(M, "construct '%S' used at level %d in its package, not %d to %d", IC->construct_name, level_in_package, IC->min_level, IC->max_level); Inter::Errors::issue(Inter::Errors::plain(M, eloc)); } if (C->W.instruction[ID_IFLD] == PACKAGE_IST) { tree_lint_state inner_tls; inner_tls.package = InterPackage::at_this_head(C); inner_tls.package_level = level + 1; InterConstruct::tree_lint_r(I, C, &inner_tls); LOOP_OVER_SYMBOLS_TABLE(S, InterPackage::scope(inner_tls.package)) if ((InterSymbol::get_flag(S, SPECULATIVE_ISYMF)) && (InterSymbol::is_defined(S) == FALSE)) { Inter::Errors::issue(Inter::Errors::quoted( I"symbol undefined in package", InterSymbol::identifier(S), eloc)); } } else { InterConstruct::tree_lint_r(I, C, tls); } } } } @ So much for a construct's invariants. Now we turn to the textual syntax for it, which of course applies only to the textual form of Inter. Moreover, the syntax fields of an //inter_construct// are used only for parsing, and not for printing instructions out again; it's just not worth the bother of doing it that way, elegant as it might be. So note that if a syntax changes, the corresponding function to write an instruction must change too. So: |syntax| specifies the textual format of the construct for parsing purposes. It needs to be set up so that no two different constructs can match the same line of text. The |syntax| is easier to read than a regular expression, which is what we turn it into. So for example |deploy !IDENTIFIER| would match the literal word |deploy|, then any amount of white space, then a literal |!| and immediately following it an identifier. Note that it is legal not to call this function, i.e., to create a construct but give it no syntax. If so, it will be inexpressible in textual Inter code. @d MAX_RECOGNITION_REGEXP_LENGTH 64 = void InterConstruct::specify_syntax(inter_construct *IC, text_stream *syntax) { IC->syntax = syntax; TEMPORARY_TEXT(regexp) for (int i = 0; i < Str::len(syntax); i++) { if (Str::includes_wide_string_at(syntax, L"OPTIONALIDENTIFIER", i)) { i += 17; WRITE_TO(regexp, "*(%%i*)"); } else if (Str::includes_wide_string_at(syntax, L"WHITESPACE", i)) { i += 9; WRITE_TO(regexp, " *"); } else if (Str::includes_wide_string_at(syntax, L"IDENTIFIER", i)) { i += 9; WRITE_TO(regexp, "(%%C+)"); } else if (Str::includes_wide_string_at(syntax, L"_IDENTIFIER", i)) { i += 10; WRITE_TO(regexp, "(_%%i+)"); } else if (Str::includes_wide_string_at(syntax, L".IDENTIFIER", i)) { i += 10; WRITE_TO(regexp, "(.%%i+)"); } else if (Str::includes_wide_string_at(syntax, L"!IDENTIFIER", i)) { i += 10; WRITE_TO(regexp, "(!%%i+)"); } else if (Str::includes_wide_string_at(syntax, L"IDENTIFIER", i)) { i += 9; WRITE_TO(regexp, "(%%i+)"); } else if (Str::includes_wide_string_at(syntax, L"NUMBER", i)) { i += 5; WRITE_TO(regexp, "(%%d+)"); } else if (Str::includes_wide_string_at(syntax, L"TOKENS", i)) { i += 5; WRITE_TO(regexp, "(%%c+)"); } else if (Str::includes_wide_string_at(syntax, L"TOKEN", i)) { i += 4; WRITE_TO(regexp, "(%%C+)"); } else if (Str::includes_wide_string_at(syntax, L"TEXT", i)) { i += 3; WRITE_TO(regexp, "\"(%%c*)\""); } else if (Str::includes_wide_string_at(syntax, L"ANY", i)) { i += 2; WRITE_TO(regexp, "(%%c*)"); } else { wchar_t c = Str::get_at(syntax, i); if (c == '\'') c = '"'; PUT_TO(regexp, c); } } if (Str::len(regexp) >= MAX_RECOGNITION_REGEXP_LENGTH - 1) internal_error("too much syntax"); int j = 0; LOOP_THROUGH_TEXT(pos, regexp) IC->recognition_regexp[j++] = Str::get(pos); IC->recognition_regexp[j++] = 0; DISCARD_TEXT(regexp) } @ There isn't really a construct with ID 0: this is used only as a sort of "not a legal construct" value. Notice the way we give it no syntax, grant it no permissions, and allow it only in an impossible range. So this cannot be expressed in textual Inter, and cannot be stored in bytecode binary Inter either. @e INVALID_IST from 0 = void InterConstruct::define_invalid_construct(void) { inter_construct *IC = InterConstruct::create_construct(INVALID_IST, I"invalid"); InterConstruct::allow_in_depth_range(IC, 0, -1); } @ The valid construct IDs then count upwards from there. Since these IDs are stored in the bytecode for an instruction, in fact in the 0th word of the frame, we will need to convert them to their //inter_construct// equivalents quickly. So we store a lookup table: @d MAX_INTER_CONSTRUCTS 100 = int inter_construct_by_ID_ready = FALSE; inter_construct *inter_construct_by_ID[MAX_INTER_CONSTRUCTS]; void InterConstruct::set_construct_for_ID(inter_ti ID, inter_construct *IC) { if (inter_construct_by_ID_ready == FALSE) { inter_construct_by_ID_ready = TRUE; for (int i=0; i= MAX_INTER_CONSTRUCTS) internal_error("too many constructs"); inter_construct_by_ID[ID] = IC; } inter_construct *InterConstruct::get_construct_for_ID(inter_ti ID) { if ((ID == INVALID_IST) || (ID >= MAX_INTER_CONSTRUCTS) || (inter_construct_by_ID_ready == FALSE)) return NULL; return inter_construct_by_ID[ID]; } @ Whence, in a faintly paranoid way: = inter_error_message *InterConstruct::get_construct(inter_tree_node *P, inter_construct **to) { if (P == NULL) return Inode::error(P, I"invalid node", NULL); inter_construct *IC = InterConstruct::get_construct_for_ID(P->W.instruction[ID_IFLD]); if (IC == NULL) return Inode::error(P, I"no such construct", NULL); if (to) *to = IC; return NULL; } @ Each construct is managed by its own section of code, and that includes the creation of the constructs: so we poll those sections in turn. = void InterConstruct::create_language(void) { SymbolAnnotation::declare_canonical_annotations(); InterConstruct::define_invalid_construct(); Inter::Nop::define(); Inter::Comment::define(); Inter::Plug::define(); Inter::Socket::define(); Inter::Version::define(); Inter::Pragma::define(); Inter::Link::define(); Inter::Append::define(); Inter::Kind::define(); Inter::DefaultValue::define(); Inter::Constant::define(); Inter::Instance::define(); Inter::Variable::define(); Inter::Property::define(); Inter::Permission::define(); Inter::PropertyValue::define(); Inter::Primitive::define(); InterPackage::define(); Inter::PackageType::define(); Inter::Label::define(); Inter::Local::define(); Inter::Inv::define(); Inter::Ref::define(); Inter::Val::define(); Inter::Lab::define(); Inter::Assembly::define(); Inter::Code::define(); Inter::Evaluation::define(); Inter::Reference::define(); Inter::Cast::define(); Inter::Splat::define(); } @ The result is printed when //inter// is run with the |-constructs| switch. = void InterConstruct::show_constructs(OUTPUT_STREAM) { WRITE(" Code Construct Syntax\n"); for (int ID=0; IDconstruct_name); for (int j = Str::len(IC->construct_name); j<20; j++) PUT(' '); WRITE("%S\n", IC->syntax); } } } @ Okay then! We have our constructs: what shall we do with them? The answer is that each construct behaves differently, in ways specified by the following method calls on the relevant //inter_construct//. Firstly, each construct has a method for verifying (i) that it is being used in a self-consistent way by the given instruction, and (ii) that it can see child nodes to that instruction of a kind it expects. @e CONSTRUCT_VERIFY_MTID @e CONSTRUCT_VERIFY_CHILDREN_MTID = VOID_METHOD_TYPE(CONSTRUCT_VERIFY_MTID, inter_construct *IC, inter_tree_node *P, inter_package *owner, inter_error_message **E) VOID_METHOD_TYPE(CONSTRUCT_VERIFY_CHILDREN_MTID, inter_construct *IC, inter_tree_node *P, inter_error_message **E) inter_error_message *InterConstruct::verify_construct(inter_package *owner, inter_tree_node *P) { inter_construct *IC = NULL; inter_error_message *E = InterConstruct::get_construct(P, &IC); if (E) return E; VOID_METHOD_CALL(IC, CONSTRUCT_VERIFY_MTID, P, owner, &E); return E; } inter_error_message *InterConstruct::verify_children(inter_tree_node *P) { inter_construct *IC = NULL; inter_error_message *E = InterConstruct::get_construct(P, &IC); if (E) return E; VOID_METHOD_CALL(IC, CONSTRUCT_VERIFY_CHILDREN_MTID, P, &E); return E; } @ This method writes out an instruction in textual Inter format, and this is handled differently by each construct. @e CONSTRUCT_WRITE_MTID = VOID_METHOD_TYPE(CONSTRUCT_WRITE_MTID, inter_construct *IC, text_stream *OUT, inter_tree_node *P, inter_error_message **E) inter_error_message *InterConstruct::write_construct_text(OUTPUT_STREAM, inter_tree_node *P) { if (P->W.instruction[ID_IFLD] == NOP_IST) return NULL; return InterConstruct::write_construct_text_allowing_nop(OUT, P); } inter_error_message *InterConstruct::write_construct_text_allowing_nop(OUTPUT_STREAM, inter_tree_node *P) { inter_construct *IC = NULL; inter_error_message *E = InterConstruct::get_construct(P, &IC); if (E) return E; for (inter_ti L=0; LW.instruction[LEVEL_IFLD]; L++) WRITE("\t"); VOID_METHOD_CALL(IC, CONSTRUCT_WRITE_MTID, OUT, P, &E); WRITE("\n"); if (P->W.instruction[ID_IFLD] == PACKAGE_IST) InterPackage::write_symbols(OUT, P); return E; } @ Conversely, the function //InterConstruct::match// takes a line of textual Inter source code, uses the regular expressions for each construct to find which one is being used, and then calls its |CONSTRUCT_READ_MTID| method to ask for the job to be completed. @e CONSTRUCT_READ_MTID = VOID_METHOD_TYPE(CONSTRUCT_READ_MTID, inter_construct *IC, inter_bookmark *, inter_line_parse *, inter_error_location *, inter_error_message **E) inter_error_message *InterConstruct::match(inter_line_parse *ilp, inter_error_location *eloc, inter_bookmark *IBM) { inter_construct *IC; LOOP_OVER(IC, inter_construct) if (IC->recognition_regexp[0]) if (Regexp::match(&ilp->mr, ilp->line, IC->recognition_regexp)) { inter_error_message *E = NULL; VOID_METHOD_CALL(IC, CONSTRUCT_READ_MTID, IBM, ilp, eloc, &E); return E; } return Inter::Errors::plain(I"bad inter line", eloc); } @ Transposition is an awkward necessity when binary Inter is read in from a file, and some references in its instruction bytecode need to be modified: this is not the place to explain it. See //Inter in Binary Files//. @e CONSTRUCT_TRANSPOSE_MTID = VOID_METHOD_TYPE(CONSTRUCT_TRANSPOSE_MTID, inter_construct *IC, inter_tree_node *P, inter_ti *grid, inter_ti max, inter_error_message **E) inter_error_message *InterConstruct::transpose_construct(inter_package *owner, inter_tree_node *P, inter_ti *grid, inter_ti max) { inter_construct *IC = NULL; inter_error_message *E = InterConstruct::get_construct(P, &IC); if (E) return E; VOID_METHOD_CALL(IC, CONSTRUCT_TRANSPOSE_MTID, P, grid, max, &E); return E; }