From c772f426e16f410aa627b63c881e94639ea9863f Mon Sep 17 00:00:00 2001 From: Graham Nelson Date: Thu, 25 Nov 2021 21:53:47 +0000 Subject: [PATCH] Rewrite of parsing stages --- README.md | 2 +- build.txt | 4 +- docs/pipeline-module/2-pe.html | 2 +- docs/pipeline-module/3-ps.html | 477 +++++++++++------- inform7/Figures/memory-diagnostics.txt | 6 +- inform7/Figures/timings-diagnostics.txt | 26 +- .../Chapter 3/Parsing Stages.w | 351 ++++++++----- 7 files changed, 534 insertions(+), 334 deletions(-) diff --git a/README.md b/README.md index 689e1296b..dac7afb2f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Inform 7 -v10.1.0-alpha.1+6T82 'Krypton' (24 November 2021) +v10.1.0-alpha.1+6T83 'Krypton' (25 November 2021) ## About Inform 7 diff --git a/build.txt b/build.txt index d44f7d93f..77897e509 100644 --- a/build.txt +++ b/build.txt @@ -1,3 +1,3 @@ Prerelease: alpha.1 -Build Date: 24 November 2021 -Build Number: 6T82 +Build Date: 25 November 2021 +Build Number: 6T83 diff --git a/docs/pipeline-module/2-pe.html b/docs/pipeline-module/2-pe.html index d5f64bede..cd6907366 100644 --- a/docs/pipeline-module/2-pe.html +++ b/docs/pipeline-module/2-pe.html @@ -226,7 +226,7 @@ what the red button marked "danger" does.

-void PipelineErrors::kit_error(char *message, text_stream *quote) {
+void PipelineErrors::kit_error(char *message, text_stream *quote) {
     #ifdef PROBLEMS_MODULE
     TEMPORARY_TEXT(M)
     WRITE_TO(M, message, quote);
diff --git a/docs/pipeline-module/3-ps.html b/docs/pipeline-module/3-ps.html
index 50ca84a8b..6f7fa232c 100644
--- a/docs/pipeline-module/3-ps.html
+++ b/docs/pipeline-module/3-ps.html
@@ -73,10 +73,20 @@ function togglePopup(material_id) {
     
 

Two stages which accept raw I6-syntax material in the parse tree, either from imsertions made using Inform 7's low-level features, or after reading the source code for a kit.

-
+
-

§1. The two stages.

+

§1. The two stages. These stages have more in common than they first appear. Both convert I6T-syntax +source code into a series of SPLAT_IST nodes in the Inter tree, with one +such node for each different directive in the I6T source. +

+

The T in "I6T" stands for "template", which in the 2010s was a mechanism for +providing I6 code to I7. That's not the arrangement any more, but the syntax +

+ +
 void ParsingStages::create_pipeline_stage(void) {
     ParsingPipelines::new_stage(I"load-kit-source", ParsingStages::run_load_kit_source,
@@ -86,9 +96,8 @@ function togglePopup(material_id) {
 }
 

§2. The stage load-kit-source K takes the kit K, looks for its source code -(which will be Inform 6-syntax source code written in a literate programming -notation) and reads this in to the current Inter tree, as a new top-level -module. +(text files written in I6T syntax) and reads this in to the current Inter tree, +placing the resulting nodes in a new top-level module.

@@ -98,13 +107,13 @@ module.
     if (main_package) Create a module to hold the Inter read in from this kit2.1;
     I6T_kit kit;
     Make a suitable I6T kit2.3;
-    ParsingStages::capture(&kit, NULL, I"all");
+    ParsingStages::I6T_reader(&kit, NULL, I"all");
     return TRUE;
 }
 

§2.1. So for example if we are reading the source for WorldModelKit, then the following creates the package /main/WorldModelKit, with package type _module. -It's into this module that all the code will be read. +It's into this module that the resulting SPLAT_IST nodes will be put.

Create a module to hold the Inter read in from this kit2.1 = @@ -119,122 +128,90 @@ It's into this module that all the code will be read. Site::set_assimilation_package(I, template_p);

-

§2.2.

+

§2.2. The stage parse-insertions does the same thing, but on a much smaller scale, +and reading raw I6T source code from LINK_IST nodes in the Inter tree rather +than from an external file. There will only be a few of these, and with not much +code in them, when the tree has been compiled by Inform: they arise from +features such as +

+ +
+Include (-
+    CuriousFunction;
+        print "Curious!";
+    ];
+-).
+
+

The inform7 code does not contain a compiler from I6T down to Inter, so +it can only leave us these unparsed fragments as LINK_IST nodes. We take +it from there. +

 int ParsingStages::run_parse_insertions(pipeline_step *step) {
     inter_tree *I = step->ephemera.repository;
     I6T_kit kit;
     Make a suitable I6T kit2.3;
-    InterTree::traverse(I, ParsingStages::catch_all_visitor, &kit, NULL, 0);
+    InterTree::traverse(I, ParsingStages::visit_insertions, &kit, NULL, LINK_IST);
     return TRUE;
 }
 
-void ParsingStages::catch_all_visitor(inter_tree *I, inter_tree_node *P, void *state) {
-    if (P->W.data[ID_IFLD] == LINK_IST) {
-        text_stream *insertion = Inode::ID_to_text(P, P->W.data[TO_RAW_LINK_IFLD]);
-        #ifdef CORE_MODULE
-        current_sentence = (parse_node *) Inode::ID_to_ref(P, P->W.data[REF_LINK_IFLD]);
-        #endif
-        I6T_kit *kit = (I6T_kit *) state;
-        ParsingStages::capture(kit, insertion, NULL);
-    }
+void ParsingStages::visit_insertions(inter_tree *I, inter_tree_node *P, void *state) {
+    text_stream *insertion = Inode::ID_to_text(P, P->W.data[TO_RAW_LINK_IFLD]);
+    #ifdef CORE_MODULE
+    current_sentence = (parse_node *) Inode::ID_to_ref(P, P->W.data[REF_LINK_IFLD]);
+    #endif
+    I6T_kit *kit = (I6T_kit *) state;
+    ParsingStages::I6T_reader(kit, insertion, NULL);
 }
 
-

§2.3. Make a suitable I6T kit2.3 = +

§2.3. So, then, both of those stages rely on (i) making something called an I6T kit, +then (ii) calling ParsingStages::I6T_reader. +

+ +

Here's where we make the kit, which is really just a collection of settings for +the I6T-reader. That comes down to: +

+ + +

For (c), note that if a kit is in directory K then its source files are +in K/Sections. +

+ +

Make a suitable I6T kit2.3 =

-    linked_list *PP = step->ephemera.the_PP;
-    inter_package *template_package = Site::ensure_assimilation_package(I, RunningPipelines::get_symbol(step, plain_ptype_RPSYM));
-
-    inter_bookmark link_bookmark =
-        Inter::Bookmarks::at_end_of_this_package(template_package);
-
-    kit = ParsingStages::kit_out(&link_bookmark, &(ParsingStages::receive_raw),  &(ParsingStages::receive_command), NULL);
-    kit.no_i6t_file_areas = LinkedLists::len(PP);
+    inter_package *assimilation_package = Site::ensure_assimilation_package(I,
+        RunningPipelines::get_symbol(step, plain_ptype_RPSYM));
+    inter_bookmark assimilation_point =
+        Inter::Bookmarks::at_end_of_this_package(assimilation_package);
+    linked_list *L = NEW_LINKED_LIST(pathname);
     pathname *P;
-    int i=0;
-    LOOP_OVER_LINKED_LIST(P, pathname, PP)
-        kit.i6t_files[i++] = Pathnames::down(P, I"Sections");
+    LOOP_OVER_LINKED_LIST(P, pathname, step->ephemera.the_PP)
+        ADD_TO_LINKED_LIST(Pathnames::down(P, I"Sections"), pathname, L);
+    kit = ParsingStages::kit_out(&assimilation_point,
+        &(ParsingStages::receive_raw), &(ParsingStages::receive_command), L, NULL);
 
-

§3.

+

§3. Once the I6T reader has unpacked the literate-programming notation, it will +reduce the I6T code to pure Inform 6 source together with (perhaps) a handful of +commands in braces. Our kit must say what to do with each of these outputs. +

+ +

The easy part: what to do when we find a command in I6T source. In pre-Inter +versions of Inform, when I6T was just a way of expressing Inform 6 code but +with some braced commands mixed in, there were lots of legal if enigmatic +syntaxes in use. Now those have all gone, so in all cases we issue an error: +

-
define IGNORE_WS_FILTER_BIT 1
-define DQUOTED_FILTER_BIT 2
-define SQUOTED_FILTER_BIT 4
-define COMMENTED_FILTER_BIT 8
-define ROUTINED_FILTER_BIT 16
-define CONTENT_ON_LINE_FILTER_BIT 32
-define SUBORDINATE_FILTER_BITS (COMMENTED_FILTER_BIT + SQUOTED_FILTER_BIT + DQUOTED_FILTER_BIT + ROUTINED_FILTER_BIT)
-
-void ParsingStages::receive_raw(text_stream *S, I6T_kit *kit) {
-    text_stream *R = Str::new();
-    int mode = IGNORE_WS_FILTER_BIT;
-    LOOP_THROUGH_TEXT(pos, S) {
-        wchar_t c = Str::get(pos);
-        if ((c == 10) || (c == 13)) c = '\n';
-        if (mode & IGNORE_WS_FILTER_BIT) {
-            if ((c == '\n') || (Characters::is_whitespace(c))) continue;
-            mode -= IGNORE_WS_FILTER_BIT;
-        }
-        if ((c == '!') && (!(mode & (DQUOTED_FILTER_BIT + SQUOTED_FILTER_BIT)))) {
-            mode = mode | COMMENTED_FILTER_BIT;
-        }
-        if (mode & COMMENTED_FILTER_BIT) {
-            if (c == '\n') {
-                mode -= COMMENTED_FILTER_BIT;
-                if (!(mode & CONTENT_ON_LINE_FILTER_BIT)) continue;
-            }
-            else continue;
-        }
-        if ((c == '[') && (!(mode & SUBORDINATE_FILTER_BITS))) {
-            mode = mode | ROUTINED_FILTER_BIT;
-        }
-        if (mode & ROUTINED_FILTER_BIT) {
-            if ((c == ']') && (!(mode & (DQUOTED_FILTER_BIT + SQUOTED_FILTER_BIT + COMMENTED_FILTER_BIT)))) mode -= ROUTINED_FILTER_BIT;
-        }
-        if ((c == '\'') && (!(mode & (DQUOTED_FILTER_BIT + COMMENTED_FILTER_BIT)))) {
-            if (mode & SQUOTED_FILTER_BIT) mode -= SQUOTED_FILTER_BIT;
-            else mode = mode | SQUOTED_FILTER_BIT;
-        }
-        if ((c == '\"') && (!(mode & (SQUOTED_FILTER_BIT + COMMENTED_FILTER_BIT)))) {
-            if (mode & DQUOTED_FILTER_BIT) mode -= DQUOTED_FILTER_BIT;
-            else mode = mode | DQUOTED_FILTER_BIT;
-        }
-        if (c != '\n') {
-            if (Characters::is_whitespace(c) == FALSE) mode = mode | CONTENT_ON_LINE_FILTER_BIT;
-        } else {
-            if (mode & CONTENT_ON_LINE_FILTER_BIT) mode = mode - CONTENT_ON_LINE_FILTER_BIT;
-            else if (!(mode & SUBORDINATE_FILTER_BITS)) continue;
-        }
-        PUT_TO(R, c);
-        if ((c == ';') && (!(mode & SUBORDINATE_FILTER_BITS))) {
-            ParsingStages::chunked_raw(R, kit);
-            mode = IGNORE_WS_FILTER_BIT;
-        }
-    }
-    ParsingStages::chunked_raw(R, kit);
-    Str::clear(S);
-}
-
-void ParsingStages::chunked_raw(text_stream *S, I6T_kit *kit) {
-    if (Str::len(S) == 0) return;
-    PUT_TO(S, '\n');
-    ParsingStages::entire_splat(kit->IBM, I"template", S, (inter_ti) (Inter::Bookmarks::baseline(kit->IBM) + 1));
-    Str::clear(S);
-}
-
-void ParsingStages::entire_splat(inter_bookmark *IBM, text_stream *origin, text_stream *content, inter_ti level) {
-    inter_ti SID = Inter::Warehouse::create_text(Inter::Bookmarks::warehouse(IBM), Inter::Bookmarks::package(IBM));
-    text_stream *glob_storage = Inter::Warehouse::get_text(Inter::Bookmarks::warehouse(IBM), SID);
-    Str::copy(glob_storage, content);
-    Produce::guard(Inter::Splat::new(IBM, SID, 0, level, 0, NULL));
-}
-
-void ParsingStages::receive_command(OUTPUT_STREAM, text_stream *command, text_stream *argument, I6T_kit *kit) {
+void ParsingStages::receive_command(OUTPUT_STREAM, text_stream *command,
+    text_stream *argument, I6T_kit *kit) {
     if ((Str::eq_wide_string(command, L"plugin")) ||
         (Str::eq_wide_string(command, L"type")) ||
         (Str::eq_wide_string(command, L"open-file")) ||
@@ -259,53 +236,183 @@ It's into this module that all the code will be read.
         (Str::eq_wide_string(command, L"testing-routine")) ||
         (Str::eq_wide_string(command, L"testing-command"))) {
         LOG("command: <%S> argument: <%S>\n", command, argument);
-        PipelineErrors::kit_error("the template command '{-%S}' has been withdrawn in this version of Inform", command);
+        PipelineErrors::kit_error(
+            "the template command '{-%S}' has been withdrawn in this version of Inform",
+            command);
     } else {
         LOG("command: <%S> argument: <%S>\n", command, argument);
         PipelineErrors::kit_error("no such {-command} as '%S'", command);
     }
 }
 
-

§4. I6T kits. These are used to abstract calls to the I6T reader, so that customers of -varying dispositions can do different things with the code parsed. +

§4. We very much do not ignore the raw I6 code read in, though. When the reader +gives us a chunk of this, we parse through it with a simple finite-state machine. +This can be summarised as "divide the code up at ; boundaries, sending each +piece in turn to //ParsingStages::splat//". But of course we do not want to +react to semicolons in quoted text or comments, and in fact we also do not +want to react to semicolons used as statement dividers inside I6 routines (i.e., +functions). So for example

-

§5.

+
+Global aspic = "this; and that";
+Don't react to this; I'm only a comment
+[ Hello; print "Hello; goodbye.^"; ];
+
+

would be divided into just two splats, +

+ +
+Global aspic = "this; and that";
+
+

and +

+ +
+[ Hello; print "Hello; goodbye.^"; ];
+
+

(And the comment would be stripped out entirely.) +

+ +
define IGNORE_WS_I6TBIT 1
+define DQUOTED_I6TBIT 2
+define SQUOTED_I6TBIT 4
+define COMMENTED_I6TBIT 8
+define ROUTINED_I6TBIT 16
+define CONTENT_ON_LINE_I6TBIT 32
+define SUBORDINATE_I6TBITS
+    (COMMENTED_I6TBIT + SQUOTED_I6TBIT + DQUOTED_I6TBIT + ROUTINED_I6TBIT)
+
+
+void ParsingStages::receive_raw(text_stream *S, I6T_kit *kit) {
+    text_stream *R = Str::new();
+    int mode = IGNORE_WS_I6TBIT;
+    LOOP_THROUGH_TEXT(pos, S) {
+        wchar_t c = Str::get(pos);
+        if ((c == 10) || (c == 13)) c = '\n';
+        if (mode & IGNORE_WS_I6TBIT) {
+            if ((c == '\n') || (Characters::is_whitespace(c))) continue;
+            mode -= IGNORE_WS_I6TBIT;
+        }
+        if ((c == '!') && (!(mode & (DQUOTED_I6TBIT + SQUOTED_I6TBIT)))) {
+            mode = mode | COMMENTED_I6TBIT;
+        }
+        if (mode & COMMENTED_I6TBIT) {
+            if (c == '\n') {
+                mode -= COMMENTED_I6TBIT;
+                if (!(mode & CONTENT_ON_LINE_I6TBIT)) continue;
+            }
+            else continue;
+        }
+        if ((c == '[') && (!(mode & SUBORDINATE_I6TBITS))) {
+            mode = mode | ROUTINED_I6TBIT;
+        }
+        if (mode & ROUTINED_I6TBIT) {
+            if ((c == ']') && (!(mode & (DQUOTED_I6TBIT + SQUOTED_I6TBIT + COMMENTED_I6TBIT))))
+                mode -= ROUTINED_I6TBIT;
+        }
+        if ((c == '\'') && (!(mode & (DQUOTED_I6TBIT + COMMENTED_I6TBIT)))) {
+            if (mode & SQUOTED_I6TBIT) mode -= SQUOTED_I6TBIT;
+            else mode = mode | SQUOTED_I6TBIT;
+        }
+        if ((c == '\"') && (!(mode & (SQUOTED_I6TBIT + COMMENTED_I6TBIT)))) {
+            if (mode & DQUOTED_I6TBIT) mode -= DQUOTED_I6TBIT;
+            else mode = mode | DQUOTED_I6TBIT;
+        }
+        if (c != '\n') {
+            if (Characters::is_whitespace(c) == FALSE)
+                mode = mode | CONTENT_ON_LINE_I6TBIT;
+        } else {
+            if (mode & CONTENT_ON_LINE_I6TBIT) mode = mode - CONTENT_ON_LINE_I6TBIT;
+            else if (!(mode & SUBORDINATE_I6TBITS)) continue;
+        }
+        PUT_TO(R, c);
+        if ((c == ';') && (!(mode & SUBORDINATE_I6TBITS))) {
+            ParsingStages::splat(R, kit);
+            mode = IGNORE_WS_I6TBIT;
+        }
+    }
+    ParsingStages::splat(R, kit);
+    Str::clear(S);
+}
+
+

§5. Each of those "splats" becomes a SPLAT_IST node in the tree at the +current insertion point recorded in the kit. +

+ +

Note that this function empties the splat buffer R before exiting. +

+ +
+void ParsingStages::splat(text_stream *R, I6T_kit *kit) {
+    if (Str::len(R) > 0) {
+        PUT_TO(R, '\n');
+        inter_ti SID = Inter::Warehouse::create_text(
+            Inter::Bookmarks::warehouse(kit->IBM), Inter::Bookmarks::package(kit->IBM));
+        text_stream *textual_storage =
+            Inter::Warehouse::get_text(Inter::Bookmarks::warehouse(kit->IBM), SID);
+        Str::copy(textual_storage, R);
+        Produce::guard(Inter::Splat::new(kit->IBM, SID, 0,
+            (inter_ti) (Inter::Bookmarks::baseline(kit->IBM) + 1), 0, NULL));
+        Str::clear(R);
+    }
+}
+
+

§6. And that's it: the result of these stages is just to break the I6T source they +found up into individual directives, and put them into the tree as SPLAT_IST nodes. +No effort has been made yet to see what directives they are. Subsequent stages +will handle that. +

+ +

§7. The I6T Reader. The rest of this section, then, is a general-purpose reader of I6T-syntax code. +Although it is only used for one purpose in the Inform code base, it once had +multiple uses, and so it's written quite flexibly. There seems no reason to +get rid of that flexibility: perhaps we'll use it again some day. +

+ +

So, then, this is the parcel of settings for controlling the I6T reader: +

 typedef struct I6T_kit {
     struct inter_bookmark *IBM;
-    int no_i6t_file_areas;
-    struct pathname *i6t_files[16];
     void (*raw_callback)(struct text_stream *, struct I6T_kit *);
-    void (*command_callback)(struct text_stream *, struct text_stream *, struct text_stream *, struct I6T_kit *);
+    void (*command_callback)(struct text_stream *, struct text_stream *,
+        struct text_stream *, struct I6T_kit *);
     void *I6T_state;
+    struct linked_list *search_paths;  of pathname
 } I6T_kit;
 
-

§6.

+

§8. We actually don't use this facility, but a kit contains a state which is +shared across the calls to the callback functions. When a kit is created, the +initial state must be supplied; after that, it's updated only by the callback +functions supplied. +

-I6T_kit ParsingStages::kit_out(inter_bookmark *IBM, void (*A)(struct text_stream *, struct I6T_kit *),
-    void (*B)(struct text_stream *, struct text_stream *, struct text_stream *, struct I6T_kit *),
-    void *C) {
+I6T_kit ParsingStages::kit_out(inter_bookmark *IBM,
+    void (*A)(struct text_stream *, struct I6T_kit *),
+    void (*B)(struct text_stream *, struct text_stream *,
+        struct text_stream *, struct I6T_kit *),
+    linked_list *search_list, void *initial_state) {
     I6T_kit kit;
     kit.IBM = IBM;
     kit.raw_callback = A;
     kit.command_callback = B;
-    kit.I6T_state = C;
-    kit.no_i6t_file_areas = 0;
+    kit.I6T_state = initial_state;
+    kit.search_paths = search_list;
     return kit;
 }
 
-

§7. Syntax of I6T files. The syntax of these files has been designed so that a valid I6T file is -also a valid Inweb section file. (Inweb now has two formats, an old and a -new one: here we can read either, though the I6T sources in the main Inform -distribution have been modernised to the new syntax.) Many Inweb syntaxes -are, however, not allowed in I6T: really, you should use only @h headings -and the = sign to divide commentary from text. Macros and definitions, in -particular, are not permitted. This means that no real tangling is required -to make the I6T files. +

§9. I6T files use a literate programming notation which is, in effect, a much +simplified version of Inweb's. (Note that Inweb can therefore read kits as +if they were webs, and we use that to weave them for the source website.) +

+ +

Many Inweb syntaxes are, however, not allowed in I6T: really, you should use +only @h headings and the = sign to divide commentary from text. Macros and +definitions, in particular, are not permitted; I6T is not really tangled as such.

The entire range of possibilities is shown here: @@ -330,7 +437,7 @@ code. While this doesn't allow for full-on literate programming, it does permit a generous amount of annotation.

-

§8. One restriction. It actually doesn't matter if a template file contains +

§10. One restriction. It actually doesn't matter if a template file contains lines longer than this, so long as they do not occur inside {-lines:...} and {-endlines}, and so long as no individual braced command {-...} exceeds this length. @@ -338,7 +445,7 @@ this length.

define MAX_I6T_LINE_LENGTH 1024
 
-

§9. The I6T interpreter is then a single routine to implement the description +

§11. The I6T interpreter is then a single routine to implement the description above, though note that it can act on interventions as well. (But in modern Inform usage, often there won't be any, because templates for the Standard Rules and so forth are assimilated in stand-alone runs of the code generator, @@ -351,25 +458,26 @@ and therefore no interventions will have happened.) int active; } contents_section_state; -void ParsingStages::capture(I6T_kit *kit, text_stream *insertion, text_stream *segment) { +void ParsingStages::I6T_reader(I6T_kit *kit, text_stream *insertion, text_stream *segment) { TEMPORARY_TEXT(T) - ParsingStages::interpret(T, insertion, segment, -1, kit, NULL); + ParsingStages::interpret(T, insertion, segment, -1, kit, NULL); (*(kit->raw_callback))(T, kit); DISCARD_TEXT(T) } -void ParsingStages::interpret(OUTPUT_STREAM, text_stream *sf, +void ParsingStages::interpret(OUTPUT_STREAM, text_stream *sf, text_stream *segment_name, int N_escape, I6T_kit *kit, filename *Input_Filename) { if (Str::eq(segment_name, I"all")) { - for (int area=0; area<kit->no_i6t_file_areas; area++) { - pathname *P = Pathnames::up(kit->i6t_files[area]); + pathname *K; + LOOP_OVER_LINKED_LIST(K, pathname, kit->search_paths) { + pathname *P = Pathnames::up(K); web_md *Wm = WebMetadata::get(P, NULL, V2_SYNTAX, NULL, FALSE, TRUE, NULL); chapter_md *Cm; LOOP_OVER_LINKED_LIST(Cm, chapter_md, Wm->chapters_md) { section_md *Sm; LOOP_OVER_LINKED_LIST(Sm, section_md, Cm->sections_md) { filename *SF = Sm->source_file_for_section; - ParsingStages::interpret(OUT, sf, Sm->sect_title, N_escape, kit, SF); + ParsingStages::interpret(OUT, sf, Sm->sect_title, N_escape, kit, SF); } } } @@ -381,11 +489,11 @@ and therefore no interventions will have happened.) FILE *Input_File = NULL; if ((Str::len(segment_name) > 0) || (Input_Filename)) { - Open the I6 template file9.1; + Open the I6 template file11.1; comment = TRUE; } else comment = FALSE; - Interpret the I6T file9.2; + Interpret the I6T file11.2; if (Input_File) { if (DL) STREAM_FLUSH(DL); fclose(Input_File); } @@ -393,25 +501,26 @@ and therefore no interventions will have happened.) }

-

§9.1. We look for the .i6t files in a list of possible locations supplied as +

§11.1. We look for the .i6t files in a list of possible locations supplied as part of the I6T kit.

-

Open the I6 template file9.1 = +

Open the I6 template file11.1 =

     if (Input_Filename)
         Input_File = Filenames::fopen(Input_Filename, "r");
-    for (int area=0; area<kit->no_i6t_file_areas; area++)
+    pathname *P;
+    LOOP_OVER_LINKED_LIST(P, pathname, kit->search_paths)
         if (Input_File == NULL)
             Input_File = Filenames::fopen(
-                Filenames::in(kit->i6t_files[area], segment_name), "r");
+                Filenames::in(P, segment_name), "r");
     if (Input_File == NULL)
         PipelineErrors::kit_error("unable to open the template segment '%S'", segment_name);
 
- -

§9.2. Interpret the I6T file9.2 = +

+

§11.2. Interpret the I6T file11.2 =

@@ -420,22 +529,22 @@ part of the I6T kit.
     do {
         Str::clear(command);
         Str::clear(argument);
-        Read next character from I6T stream9.2.1;
+        Read next character from I6T stream11.2.1;
         NewCharacter: if (cr == EOF) break;
         if (((cr == '@') || (cr == '=')) && (col == 1)) {
             int inweb_syntax = -1;
-            if (cr == '=') Read the rest of line as an equals-heading9.2.3
-            else Read the rest of line as an at-heading9.2.2;
-            Act on the heading, going in or out of comment mode as appropriate9.2.4;
+            if (cr == '=') Read the rest of line as an equals-heading11.2.3
+            else Read the rest of line as an at-heading11.2.2;
+            Act on the heading, going in or out of comment mode as appropriate11.2.4;
             continue;
         }
-        if (comment == FALSE) Deal with material which isn't commentary9.2.5;
+        if (comment == FALSE) Deal with material which isn't commentary11.2.5;
     } while (cr != EOF);
     DISCARD_TEXT(command)
     DISCARD_TEXT(argument)
 
- -

§9.2.1. I6 template files are encoded as ISO Latin-1, not as Unicode UTF-8, so +

+

§11.2.1. I6 template files are encoded as ISO Latin-1, not as Unicode UTF-8, so ordinary fgetc is used, and no BOM marker is parsed. Lines are assumed to be terminated with either 0x0a or 0x0d. (Since blank lines are harmless, we take no trouble over 0a0d or 0d0a combinations.) The @@ -443,7 +552,7 @@ built-in template files, almost always the only ones used, are line terminated 0x0a in Unix fashion.

-

Read next character from I6T stream9.2.1 = +

Read next character from I6T stream11.2.1 =

@@ -453,8 +562,8 @@ terminated 0x0a
     } else cr = EOF;
     col++; if ((cr == 10) || (cr == 13)) col = 0;
 
- -

§9.2.2. Anything following an at-character in the first column is looked at to see if +

+

§11.2.2. Anything following an at-character in the first column is looked at to see if it's a heading, that is, an Inweb syntax. We recognise both @h and @p as heading markers, in order to accommodate both old and new Inweb syntaxes.

@@ -467,14 +576,14 @@ heading markers, in order to accommodate both old and new Inweb syntaxes. define INWEB_EQUALS_SYNTAX 6 define INWEB_EXTRACT_SYNTAX 7 -

Read the rest of line as an at-heading9.2.2 = +

Read the rest of line as an at-heading11.2.2 =

     TEMPORARY_TEXT(I6T_buffer)
     int i = 0, committed = FALSE, unacceptable_character = FALSE;
     while (i<MAX_I6T_LINE_LENGTH) {
-        Read next character from I6T stream9.2.1;
+        Read next character from I6T stream11.2.1;
         if ((committed == FALSE) && ((cr == 10) || (cr == 13) || (cr == ' '))) {
             if (Str::eq_wide_string(I6T_buffer, L"p"))
                 inweb_syntax = INWEB_PARAGRAPH_SYNTAX;
@@ -523,15 +632,15 @@ heading markers, in order to accommodate both old and new Inweb syntaxes.
     Str::copy(command, I6T_buffer);
     DISCARD_TEXT(I6T_buffer)
 
- -

§9.2.3. Read the rest of line as an equals-heading9.2.3 = +

+

§11.2.3. Read the rest of line as an equals-heading11.2.3 =

     TEMPORARY_TEXT(I6T_buffer)
     int i = 0;
     while (i<MAX_I6T_LINE_LENGTH) {
-        Read next character from I6T stream9.2.1;
+        Read next character from I6T stream11.2.1;
         if ((cr == 10) || (cr == 13)) break;
         PUT_TO(I6T_buffer, cr);
     }
@@ -556,13 +665,13 @@ heading markers, in order to accommodate both old and new Inweb syntaxes.
     }
     Regexp::dispose_of(&mr);
 
- -

§9.2.4. As can be seen, only a small minority of Inweb syntaxes are allowed: +

+

§11.2.4. As can be seen, only a small minority of Inweb syntaxes are allowed: in particular, no definitions or angle-bracketed macros. This reader is not a full-fledged tangler.

-

Act on the heading, going in or out of comment mode as appropriate9.2.4 = +

Act on the heading, going in or out of comment mode as appropriate11.2.4 =

@@ -598,20 +707,20 @@ in particular, no definitions
         case INWEB_FIGURE_SYNTAX: break;
     }
 
- -

§9.2.5. Deal with material which isn't commentary9.2.5 = +

+

§11.2.5. Deal with material which isn't commentary11.2.5 =

     if (cr == '{') {
-        Read next character from I6T stream9.2.1;
+        Read next character from I6T stream11.2.1;
         if (cr == '-') {
-            Read up to the next close brace as an I6T command and argument9.2.5.1;
+            Read up to the next close brace as an I6T command and argument11.2.5.1;
             if (Str::get_first_char(command) == '!') continue;
-            Act on I6T command and argument9.2.5.3;
+            Act on I6T command and argument11.2.5.3;
             continue;
         } else if ((cr == 'N') && (N_escape >= 0)) {
-            Read next character from I6T stream9.2.1;
+            Read next character from I6T stream11.2.1;
             if (cr == '}') {
                 WRITE("%d", N_escape);
                 continue;
@@ -624,9 +733,9 @@ in particular, no definitions
         }
     }
     if (cr == '(') {
-        Read next character from I6T stream9.2.1;
+        Read next character from I6T stream11.2.1;
         if (cr == '+') {
-            Read up to the next plus close-bracket as an I7 expression9.2.5.2;
+            Read up to the next plus close-bracket as an I7 expression11.2.5.2;
             continue;
         } else {  otherwise the open bracket was a literal
             PUT_TO(OUT, '(');
@@ -635,13 +744,13 @@ in particular, no definitions
     }
     PUT_TO(OUT, cr);
 
- -

§9.2.5.1. And here we read a normal command. The command name must not include } +

+

§11.2.5.1. And here we read a normal command. The command name must not include } or :. If there is no : then the argument is left unset (so that it will be the empty string: see above). The argument must not include }.

-

Read up to the next close brace as an I6T command and argument9.2.5.1 = +

Read up to the next close brace as an I6T command and argument11.2.5.1 =

@@ -649,25 +758,25 @@ be the empty string: see above). The argument must not include     Str::clear(argument);
     int com_mode = TRUE;
     while (TRUE) {
-        Read next character from I6T stream9.2.1;
+        Read next character from I6T stream11.2.1;
         if ((cr == '}') || (cr == EOF)) break;
         if ((cr == ':') && (com_mode)) { com_mode = FALSE; continue; }
         if (com_mode) PUT_TO(command, cr);
         else PUT_TO(argument, cr);
     }
 
- -

§9.2.5.2. And similarly, for the (+ ... +) notation used to mark I7 material +

+

§11.2.5.2. And similarly, for the (+ ... +) notation used to mark I7 material within I6:

-

Read up to the next plus close-bracket as an I7 expression9.2.5.2 = +

Read up to the next plus close-bracket as an I7 expression11.2.5.2 =

     TEMPORARY_TEXT(i7_exp)
     while (TRUE) {
-        Read next character from I6T stream9.2.1;
+        Read next character from I6T stream11.2.1;
         if (cr == EOF) break;
         if ((cr == ')') && (Str::get_last_char(i7_exp) == '+')) {
             Str::delete_last_character(i7_exp); break; }
@@ -677,16 +786,16 @@ within I6:
     DISCARD_TEXT(i7_exp)
         PipelineErrors::kit_error("use of (+ ... +) in the template has been withdrawn: '%S'", i7_exp);
 
- -

§9.2.5.3. Acting on I6T commands. Act on I6T command and argument9.2.5.3 = +

+

§11.2.5.3. Acting on I6T commands. Act on I6T command and argument11.2.5.3 =

-    Act on the I6T segment command9.2.5.3.1;
+    Act on the I6T segment command11.2.5.3.1;
     (*(kit->command_callback))(OUT, command, argument, kit);
 
- -

§9.2.5.3.1. The {-segment:...} command recursively calls the I6T interpreter on the +

+

§11.2.5.3.1. The {-segment:...} command recursively calls the I6T interpreter on the supplied I6T filename, which means it acts rather like #include in C. Note that because we pass the current output file handle of through to this new invocation, it will have the file open if we do, and closed if @@ -694,7 +803,7 @@ we do. It won't run in indexing mode, so {-open-index} and {-close-index}.

-

Act on the I6T segment command9.2.5.3.1 = +

Act on the I6T segment command11.2.5.3.1 =

@@ -702,14 +811,14 @@ safely between {-open-index}<
 internal_error("neurotica!");
         (*(kit->raw_callback))(OUT, kit);
         Str::clear(OUT);
-        ParsingStages::interpret(OUT, NULL, argument, -1, kit, NULL);
+        ParsingStages::interpret(OUT, NULL, argument, -1, kit, NULL);
         (*(kit->raw_callback))(OUT, kit);
         Str::clear(OUT);
         continue;
     }
 
- -

§10. Contents section.

+ +

§12. Contents section.

 void ParsingStages::read_contents(text_stream *text, text_file_position *tfp, void *state) {
diff --git a/inform7/Figures/memory-diagnostics.txt b/inform7/Figures/memory-diagnostics.txt
index 124e21db8..3b0bc0808 100644
--- a/inform7/Figures/memory-diagnostics.txt
+++ b/inform7/Figures/memory-diagnostics.txt
@@ -1,10 +1,10 @@
 Total memory consumption was 400127K = 391 MB
 
-60.9% was used for 2026653 objects, in 380059 frames in 305 x 800K = 244000K = 238 MB:
+60.9% was used for 2026654 objects, in 380060 frames in 305 x 800K = 244000K = 238 MB:
 
     10.2%  inter_tree_node_array                    58 x 8192 = 475136 objects, 41813824 bytes
      7.2%  text_stream_array                        5302 x 100 = 530200 objects, 29860864 bytes
-     4.7%  linked_list                              34789 objects, 19481840 bytes
+     4.7%  linked_list                              34790 objects, 19482400 bytes
      3.9%  inter_symbol_array                       140 x 1024 = 143360 objects, 16060800 bytes
      2.5%  parse_node                               129462 objects, 10356960 bytes
      1.8%  verb_conjugation                         160 objects, 7425280 bytes
@@ -255,5 +255,5 @@ Total memory consumption was 400127K = 391 MB
      ----  code generation workspace for objects    1336 bytes in 4 claims
      ----  emitter array storage                    161792 bytes in 2062 claims
 
-18.5% was overhead - 76160600 bytes = 74375K = 72 MB
+18.5% was overhead - 76160040 bytes = 74375K = 72 MB
 
diff --git a/inform7/Figures/timings-diagnostics.txt b/inform7/Figures/timings-diagnostics.txt
index c3bf30bf7..f6844d95a 100644
--- a/inform7/Figures/timings-diagnostics.txt
+++ b/inform7/Figures/timings-diagnostics.txt
@@ -1,36 +1,36 @@
 100.0% in inform7 run
-     55.6% in compilation to Inter
-         40.1% in //Sequence::undertake_queued_tasks//
-          3.5% in //MajorNodes::pre_pass//
+     56.0% in compilation to Inter
+         40.5% in //Sequence::undertake_queued_tasks//
+          3.4% in //MajorNodes::pre_pass//
           2.4% in //MajorNodes::pass_1//
-          2.1% in //RTPhrasebook::compile_entries//
+          2.2% in //RTPhrasebook::compile_entries//
           1.3% in //ImperativeDefinitions::assess_all//
           1.2% in //RTKindConstructors::compile//
           0.4% in //MajorNodes::pass_2//
           0.4% in //Sequence::undertake_queued_tasks//
+          0.4% in //Sequence::undertake_queued_tasks//
           0.4% in //World::stage_V//
           0.3% in //ImperativeDefinitions::compile_first_block//
-          0.3% in //Sequence::undertake_queued_tasks//
           0.1% in //CompletionModule::compile//
           0.1% in //InferenceSubjects::emit_all//
           0.1% in //RTKindConstructors::compile_permissions//
           0.1% in //Task::make_built_in_kind_constructors//
-          2.0% not specifically accounted for
-     42.3% in running Inter pipeline
-         11.9% in step preparation
+          2.1% not specifically accounted for
+     42.0% in running Inter pipeline
+         12.0% in step preparation
           9.7% in inter step 7/16: consolidate-text
-          8.2% in inter step 6/16: load-binary-kits
+          8.1% in inter step 6/16: load-binary-kits
           6.9% in inter step 16/16: generate inform6 -> auto.inf
-          1.5% in inter step 11/16: make-identifiers-unique
+          1.6% in inter step 11/16: make-identifiers-unique
           0.4% in inter step 12/16: reconcile-verbs
           0.3% in inter step 14/16: eliminate-redundant-operations
           0.3% in inter step 5/16: assimilate
           0.3% in inter step 8/16: resolve-external-symbols
+          0.3% in inter step 9/16: inspect-plugs
           0.1% in inter step 10/16: detect-indirect-calls
           0.1% in inter step 13/16: eliminate-redundant-labels
           0.1% in inter step 3/16: parse-linked-matter
           0.1% in inter step 4/16: resolve-conditional-compilation
-          0.1% in inter step 9/16: inspect-plugs
-          1.7% not specifically accounted for
-      1.7% in supervisor
+          1.4% not specifically accounted for
+      1.6% in supervisor
       0.4% not specifically accounted for
diff --git a/inter/pipeline-module/Chapter 3/Parsing Stages.w b/inter/pipeline-module/Chapter 3/Parsing Stages.w
index 20f67ae1f..5fc341ab2 100644
--- a/inter/pipeline-module/Chapter 3/Parsing Stages.w	
+++ b/inter/pipeline-module/Chapter 3/Parsing Stages.w	
@@ -5,6 +5,14 @@ imsertions made using Inform 7's low-level features, or after reading the
 source code for a kit.
 
 @h The two stages.
+These stages have more in common than they first appear. Both convert I6T-syntax
+source code into a series of |SPLAT_IST| nodes in the Inter tree, with one
+such node for each different directive in the I6T source.
+
+The T in "I6T" stands for "template", which in the 2010s was a mechanism for
+providing I6 code to I7. That's not the arrangement any more, but the syntax
+(mostly) lives on, and so does the name I6T. Still, it's really just the same
+thing as Inform 6 code in an Inweb-style literate programming notation.
 
 =
 void ParsingStages::create_pipeline_stage(void) {
@@ -15,9 +23,8 @@ void ParsingStages::create_pipeline_stage(void) {
 }
 
 @ The stage |load-kit-source K| takes the kit |K|, looks for its source code
-(which will be Inform 6-syntax source code written in a literate programming
-notation) and reads this in to the current Inter tree, as a new top-level
-module.
+(text files written in I6T syntax) and reads this in to the current Inter tree,
+placing the resulting nodes in a new top-level module.
 
 =
 int ParsingStages::run_load_kit_source(pipeline_step *step) {
@@ -26,13 +33,13 @@ int ParsingStages::run_load_kit_source(pipeline_step *step) {
 	if (main_package) @;
 	I6T_kit kit;
 	@;
-	ParsingStages::capture(&kit, NULL, I"all");
+	ParsingStages::I6T_reader(&kit, NULL, I"all");
 	return TRUE;
 }
 
 @ So for example if we are reading the source for WorldModelKit, then the
 following creates the package |/main/WorldModelKit|, with package type |_module|.
-It's into this module that all the code will be read.
+It's into this module that the resulting |SPLAT_IST| nodes will be put.
 
 @ =
 	inter_bookmark IBM = Inter::Bookmarks::at_end_of_this_package(main_package);
@@ -42,117 +49,78 @@ It's into this module that all the code will be read.
 		module_name, 1, NULL, &template_p);
 	Site::set_assimilation_package(I, template_p);
 
-@ =
+@ The stage |parse-insertions| does the same thing, but on a much smaller scale,
+and reading raw I6T source code from |LINK_IST| nodes in the Inter tree rather
+than from an external file. There will only be a few of these, and with not much
+code in them, when the tree has been compiled by Inform: they arise from
+features such as
+= (text as Inform 7)
+Include (-
+	[ CuriousFunction;
+		print "Curious!";
+	];
+-).
+=
+The //inform7// code does not contain a compiler from I6T down to Inter, so
+it can only leave us these unparsed fragments as |LINK_IST| nodes. We take
+it from there.
+
+=
 int ParsingStages::run_parse_insertions(pipeline_step *step) {
 	inter_tree *I = step->ephemera.repository;
 	I6T_kit kit;
 	@;
-	InterTree::traverse(I, ParsingStages::catch_all_visitor, &kit, NULL, 0);
+	InterTree::traverse(I, ParsingStages::visit_insertions, &kit, NULL, LINK_IST);
 	return TRUE;
 }
 
-void ParsingStages::catch_all_visitor(inter_tree *I, inter_tree_node *P, void *state) {
-	if (P->W.data[ID_IFLD] == LINK_IST) {
-		text_stream *insertion = Inode::ID_to_text(P, P->W.data[TO_RAW_LINK_IFLD]);
-		#ifdef CORE_MODULE
-		current_sentence = (parse_node *) Inode::ID_to_ref(P, P->W.data[REF_LINK_IFLD]);
-		#endif
-		I6T_kit *kit = (I6T_kit *) state;
-		ParsingStages::capture(kit, insertion, NULL);
-	}
+void ParsingStages::visit_insertions(inter_tree *I, inter_tree_node *P, void *state) {
+	text_stream *insertion = Inode::ID_to_text(P, P->W.data[TO_RAW_LINK_IFLD]);
+	#ifdef CORE_MODULE
+	current_sentence = (parse_node *) Inode::ID_to_ref(P, P->W.data[REF_LINK_IFLD]);
+	#endif
+	I6T_kit *kit = (I6T_kit *) state;
+	ParsingStages::I6T_reader(kit, insertion, NULL);
 }
 
+@ So, then, both of those stages rely on (i) making something called an I6T kit,
+then (ii) calling //ParsingStages::I6T_reader//.
+
+Here's where we make the kit, which is really just a collection of settings for
+the I6T-reader. That comes down to:
+
+(a) the place to put any nodes generated,
+(b) what to do with I6 source code, or with commands embedded in it, and
+(c) which file-system paths to look inside when reading from files rather
+than raw text in memory.
+
+For (c), note that if a kit is in directory |K| then its source files are
+in |K/Sections|.
+
 @ =
-	linked_list *PP = step->ephemera.the_PP;
-	inter_package *template_package = Site::ensure_assimilation_package(I, RunningPipelines::get_symbol(step, plain_ptype_RPSYM));	
-	
-	inter_bookmark link_bookmark =
-		Inter::Bookmarks::at_end_of_this_package(template_package);
-
-	kit = ParsingStages::kit_out(&link_bookmark, &(ParsingStages::receive_raw),  &(ParsingStages::receive_command), NULL);
-	kit.no_i6t_file_areas = LinkedLists::len(PP);
+	inter_package *assimilation_package = Site::ensure_assimilation_package(I,
+		RunningPipelines::get_symbol(step, plain_ptype_RPSYM));
+	inter_bookmark assimilation_point =
+		Inter::Bookmarks::at_end_of_this_package(assimilation_package);
+	linked_list *L = NEW_LINKED_LIST(pathname);
 	pathname *P;
-	int i=0;
-	LOOP_OVER_LINKED_LIST(P, pathname, PP)
-		kit.i6t_files[i++] = Pathnames::down(P, I"Sections");
+	LOOP_OVER_LINKED_LIST(P, pathname, step->ephemera.the_PP)
+		ADD_TO_LINKED_LIST(Pathnames::down(P, I"Sections"), pathname, L);
+	kit = ParsingStages::kit_out(&assimilation_point,
+		&(ParsingStages::receive_raw), &(ParsingStages::receive_command), L, NULL);
 
-@
+@ Once the I6T reader has unpacked the literate-programming notation, it will
+reduce the I6T code to pure Inform 6 source together with (perhaps) a handful of
+commands in braces. Our kit must say what to do with each of these outputs.
 
-@d IGNORE_WS_FILTER_BIT 1
-@d DQUOTED_FILTER_BIT 2
-@d SQUOTED_FILTER_BIT 4
-@d COMMENTED_FILTER_BIT 8
-@d ROUTINED_FILTER_BIT 16
-@d CONTENT_ON_LINE_FILTER_BIT 32
-
-@d SUBORDINATE_FILTER_BITS (COMMENTED_FILTER_BIT + SQUOTED_FILTER_BIT + DQUOTED_FILTER_BIT + ROUTINED_FILTER_BIT)
+The easy part: what to do when we find a command in I6T source. In pre-Inter
+versions of Inform, when I6T was just a way of expressing Inform 6 code but
+with some braced commands mixed in, there were lots of legal if enigmatic
+syntaxes in use. Now those have all gone, so in all cases we issue an error:
 
 =
-void ParsingStages::receive_raw(text_stream *S, I6T_kit *kit) {
-	text_stream *R = Str::new();
-	int mode = IGNORE_WS_FILTER_BIT;
-	LOOP_THROUGH_TEXT(pos, S) {
-		wchar_t c = Str::get(pos);
-		if ((c == 10) || (c == 13)) c = '\n';
-		if (mode & IGNORE_WS_FILTER_BIT) {
-			if ((c == '\n') || (Characters::is_whitespace(c))) continue;
-			mode -= IGNORE_WS_FILTER_BIT;
-		}
-		if ((c == '!') && (!(mode & (DQUOTED_FILTER_BIT + SQUOTED_FILTER_BIT)))) {
-			mode = mode | COMMENTED_FILTER_BIT;
-		}
-		if (mode & COMMENTED_FILTER_BIT) {
-			if (c == '\n') {
-				mode -= COMMENTED_FILTER_BIT;
-				if (!(mode & CONTENT_ON_LINE_FILTER_BIT)) continue;
-			}
-			else continue;
-		}
-		if ((c == '[') && (!(mode & SUBORDINATE_FILTER_BITS))) {
-			mode = mode | ROUTINED_FILTER_BIT;
-		}
-		if (mode & ROUTINED_FILTER_BIT) {
-			if ((c == ']') && (!(mode & (DQUOTED_FILTER_BIT + SQUOTED_FILTER_BIT + COMMENTED_FILTER_BIT)))) mode -= ROUTINED_FILTER_BIT;
-		}
-		if ((c == '\'') && (!(mode & (DQUOTED_FILTER_BIT + COMMENTED_FILTER_BIT)))) {
-			if (mode & SQUOTED_FILTER_BIT) mode -= SQUOTED_FILTER_BIT;
-			else mode = mode | SQUOTED_FILTER_BIT;
-		}
-		if ((c == '\"') && (!(mode & (SQUOTED_FILTER_BIT + COMMENTED_FILTER_BIT)))) {
-			if (mode & DQUOTED_FILTER_BIT) mode -= DQUOTED_FILTER_BIT;
-			else mode = mode | DQUOTED_FILTER_BIT;
-		}
-		if (c != '\n') {
-			if (Characters::is_whitespace(c) == FALSE) mode = mode | CONTENT_ON_LINE_FILTER_BIT;
-		} else {
-			if (mode & CONTENT_ON_LINE_FILTER_BIT) mode = mode - CONTENT_ON_LINE_FILTER_BIT;
-			else if (!(mode & SUBORDINATE_FILTER_BITS)) continue;
-		}
-		PUT_TO(R, c);
-		if ((c == ';') && (!(mode & SUBORDINATE_FILTER_BITS))) {
-			ParsingStages::chunked_raw(R, kit);
-			mode = IGNORE_WS_FILTER_BIT;
-		}
-	}
-	ParsingStages::chunked_raw(R, kit);
-	Str::clear(S);
-}
-
-void ParsingStages::chunked_raw(text_stream *S, I6T_kit *kit) {
-	if (Str::len(S) == 0) return;
-	PUT_TO(S, '\n');
-	ParsingStages::entire_splat(kit->IBM, I"template", S, (inter_ti) (Inter::Bookmarks::baseline(kit->IBM) + 1));
-	Str::clear(S);
-}
-
-void ParsingStages::entire_splat(inter_bookmark *IBM, text_stream *origin, text_stream *content, inter_ti level) {
-	inter_ti SID = Inter::Warehouse::create_text(Inter::Bookmarks::warehouse(IBM), Inter::Bookmarks::package(IBM));
-	text_stream *glob_storage = Inter::Warehouse::get_text(Inter::Bookmarks::warehouse(IBM), SID);
-	Str::copy(glob_storage, content);
-	Produce::guard(Inter::Splat::new(IBM, SID, 0, level, 0, NULL));
-}
-
-void ParsingStages::receive_command(OUTPUT_STREAM, text_stream *command, text_stream *argument, I6T_kit *kit) {
+void ParsingStages::receive_command(OUTPUT_STREAM, text_stream *command,
+	text_stream *argument, I6T_kit *kit) {
 	if ((Str::eq_wide_string(command, L"plugin")) ||
 		(Str::eq_wide_string(command, L"type")) ||
 		(Str::eq_wide_string(command, L"open-file")) ||
@@ -177,54 +145,175 @@ void ParsingStages::receive_command(OUTPUT_STREAM, text_stream *command, text_st
 		(Str::eq_wide_string(command, L"testing-routine")) ||
 		(Str::eq_wide_string(command, L"testing-command"))) {
 		LOG("command: <%S> argument: <%S>\n", command, argument);
-		PipelineErrors::kit_error("the template command '{-%S}' has been withdrawn in this version of Inform", command);
+		PipelineErrors::kit_error(
+			"the template command '{-%S}' has been withdrawn in this version of Inform",
+			command);
 	} else {
 		LOG("command: <%S> argument: <%S>\n", command, argument);
 		PipelineErrors::kit_error("no such {-command} as '%S'", command);
 	}
 }
 
-@h I6T kits.
-These are used to abstract calls to the I6T reader, so that customers of
-varying dispositions can do different things with the code parsed.
+@ We very much do not ignore the raw I6 code read in, though. When the reader
+gives us a chunk of this, we parse through it with a simple finite-state machine.
+This can be summarised as "divide the code up at |;| boundaries, sending each
+piece in turn to //ParsingStages::splat//". But of course we do not want to
+react to semicolons in quoted text or comments, and in fact we also do not
+want to react to semicolons used as statement dividers inside I6 routines (i.e.,
+functions). So for example
+= (text as Inform 6)
+Global aspic = "this; and that";
+! Don't react to this; I'm only a comment
+[ Hello; print "Hello; goodbye.^"; ];
+=
+would be divided into just two splats,
+= (text as Inform 6)
+Global aspic = "this; and that";
+=
+and
+= (text as Inform 6)
+[ Hello; print "Hello; goodbye.^"; ];
+=
+(And the comment would be stripped out entirely.)
 
-@ =
+@d IGNORE_WS_I6TBIT 1
+@d DQUOTED_I6TBIT 2
+@d SQUOTED_I6TBIT 4
+@d COMMENTED_I6TBIT 8
+@d ROUTINED_I6TBIT 16
+@d CONTENT_ON_LINE_I6TBIT 32
+
+@d SUBORDINATE_I6TBITS
+	(COMMENTED_I6TBIT + SQUOTED_I6TBIT + DQUOTED_I6TBIT + ROUTINED_I6TBIT)
+
+=
+void ParsingStages::receive_raw(text_stream *S, I6T_kit *kit) {
+	text_stream *R = Str::new();
+	int mode = IGNORE_WS_I6TBIT;
+	LOOP_THROUGH_TEXT(pos, S) {
+		wchar_t c = Str::get(pos);
+		if ((c == 10) || (c == 13)) c = '\n';
+		if (mode & IGNORE_WS_I6TBIT) {
+			if ((c == '\n') || (Characters::is_whitespace(c))) continue;
+			mode -= IGNORE_WS_I6TBIT;
+		}
+		if ((c == '!') && (!(mode & (DQUOTED_I6TBIT + SQUOTED_I6TBIT)))) {
+			mode = mode | COMMENTED_I6TBIT;
+		}
+		if (mode & COMMENTED_I6TBIT) {
+			if (c == '\n') {
+				mode -= COMMENTED_I6TBIT;
+				if (!(mode & CONTENT_ON_LINE_I6TBIT)) continue;
+			}
+			else continue;
+		}
+		if ((c == '[') && (!(mode & SUBORDINATE_I6TBITS))) {
+			mode = mode | ROUTINED_I6TBIT;
+		}
+		if (mode & ROUTINED_I6TBIT) {
+			if ((c == ']') && (!(mode & (DQUOTED_I6TBIT + SQUOTED_I6TBIT + COMMENTED_I6TBIT))))
+				mode -= ROUTINED_I6TBIT;
+		}
+		if ((c == '\'') && (!(mode & (DQUOTED_I6TBIT + COMMENTED_I6TBIT)))) {
+			if (mode & SQUOTED_I6TBIT) mode -= SQUOTED_I6TBIT;
+			else mode = mode | SQUOTED_I6TBIT;
+		}
+		if ((c == '\"') && (!(mode & (SQUOTED_I6TBIT + COMMENTED_I6TBIT)))) {
+			if (mode & DQUOTED_I6TBIT) mode -= DQUOTED_I6TBIT;
+			else mode = mode | DQUOTED_I6TBIT;
+		}
+		if (c != '\n') {
+			if (Characters::is_whitespace(c) == FALSE)
+				mode = mode | CONTENT_ON_LINE_I6TBIT;
+		} else {
+			if (mode & CONTENT_ON_LINE_I6TBIT) mode = mode - CONTENT_ON_LINE_I6TBIT;
+			else if (!(mode & SUBORDINATE_I6TBITS)) continue;
+		}
+		PUT_TO(R, c);
+		if ((c == ';') && (!(mode & SUBORDINATE_I6TBITS))) {
+			ParsingStages::splat(R, kit);
+			mode = IGNORE_WS_I6TBIT;
+		}
+	}
+	ParsingStages::splat(R, kit);
+	Str::clear(S);
+}
+
+@ Each of those "splats" becomes a |SPLAT_IST| node in the tree at the
+current insertion point recorded in the kit.
+
+Note that this function empties the splat buffer |R| before exiting.
+
+=
+void ParsingStages::splat(text_stream *R, I6T_kit *kit) {
+	if (Str::len(R) > 0) {
+		PUT_TO(R, '\n');
+		inter_ti SID = Inter::Warehouse::create_text(
+			Inter::Bookmarks::warehouse(kit->IBM), Inter::Bookmarks::package(kit->IBM));
+		text_stream *textual_storage =
+			Inter::Warehouse::get_text(Inter::Bookmarks::warehouse(kit->IBM), SID);
+		Str::copy(textual_storage, R);
+		Produce::guard(Inter::Splat::new(kit->IBM, SID, 0,
+			(inter_ti) (Inter::Bookmarks::baseline(kit->IBM) + 1), 0, NULL));
+		Str::clear(R);
+	}
+}
+
+@ And that's it: the result of these stages is just to break the I6T source they
+found up into individual directives, and put them into the tree as |SPLAT_IST| nodes.
+No effort has been made yet to see what directives they are. Subsequent stages
+will handle that.
+
+@h The I6T Reader.
+The rest of this section, then, is a general-purpose reader of I6T-syntax code.
+Although it is only used for one purpose in the Inform code base, it once had
+multiple uses, and so it's written quite flexibly. There seems no reason to
+get rid of that flexibility: perhaps we'll use it again some day.
+
+So, then, this is the parcel of settings for controlling the I6T reader:
+
+=
 typedef struct I6T_kit {
 	struct inter_bookmark *IBM;
-	int no_i6t_file_areas;
-	struct pathname *i6t_files[16];
 	void (*raw_callback)(struct text_stream *, struct I6T_kit *);
-	void (*command_callback)(struct text_stream *, struct text_stream *, struct text_stream *, struct I6T_kit *);
+	void (*command_callback)(struct text_stream *, struct text_stream *,
+		struct text_stream *, struct I6T_kit *);
 	void *I6T_state;
+	struct linked_list *search_paths; /* of |pathname| */
 } I6T_kit;
 
-@ =
-I6T_kit ParsingStages::kit_out(inter_bookmark *IBM, void (*A)(struct text_stream *, struct I6T_kit *),
-	void (*B)(struct text_stream *, struct text_stream *, struct text_stream *, struct I6T_kit *),
-	void *C) {
+@ We actually don't use this facility, but a kit contains a |state| which is
+shared across the calls to the callback functions. When a kit is created, the
+initial state must be supplied; after that, it's updated only by the callback
+functions supplied.
+
+=
+I6T_kit ParsingStages::kit_out(inter_bookmark *IBM,
+	void (*A)(struct text_stream *, struct I6T_kit *),
+	void (*B)(struct text_stream *, struct text_stream *,
+		struct text_stream *, struct I6T_kit *),
+	linked_list *search_list, void *initial_state) {
 	I6T_kit kit;
 	kit.IBM = IBM;
 	kit.raw_callback = A;
 	kit.command_callback = B;
-	kit.I6T_state = C;
-	kit.no_i6t_file_areas = 0;
+	kit.I6T_state = initial_state;
+	kit.search_paths = search_list;
 	return kit;
 }
 
-@h Syntax of I6T files.
-The syntax of these files has been designed so that a valid I6T file is
-also a valid Inweb section file. (Inweb now has two formats, an old and a
-new one: here we can read either, though the I6T sources in the main Inform
-distribution have been modernised to the new syntax.) Many Inweb syntaxes
-are, however, not allowed in I6T: really, you should use only |@h| headings
-and the |=| sign to divide commentary from text. Macros and definitions, in
-particular, are not permitted. This means that no real tangling is required
-to make the I6T files.
+@ I6T files use a literate programming notation which is, in effect, a much
+simplified version of Inweb's. (Note that Inweb can therefore read kits as
+if they were webs, and we use that to weave them for the source website.)
+
+Many Inweb syntaxes are, however, not allowed in I6T: really, you should use
+only |@h| headings and the |=| sign to divide commentary from text. Macros and
+definitions, in particular, are not permitted; I6T is not really tangled as such.
 
 The entire range of possibilities is shown here:
 = (text as Inweb)
 	Circuses.
-	 
+	
 	This hypothetical I6T file provides support for holding circuses.
 	 
 	@h Start.
@@ -259,7 +348,7 @@ typedef struct contents_section_state {
 	int active;
 } contents_section_state;
 
-void ParsingStages::capture(I6T_kit *kit, text_stream *insertion, text_stream *segment) {
+void ParsingStages::I6T_reader(I6T_kit *kit, text_stream *insertion, text_stream *segment) {
 	TEMPORARY_TEXT(T)
 	ParsingStages::interpret(T, insertion, segment, -1, kit, NULL);
 	(*(kit->raw_callback))(T, kit);
@@ -269,8 +358,9 @@ void ParsingStages::capture(I6T_kit *kit, text_stream *insertion, text_stream *s
 void ParsingStages::interpret(OUTPUT_STREAM, text_stream *sf,
 	text_stream *segment_name, int N_escape, I6T_kit *kit, filename *Input_Filename) {
 	if (Str::eq(segment_name, I"all")) {
-		for (int area=0; areano_i6t_file_areas; area++) {
-			pathname *P = Pathnames::up(kit->i6t_files[area]);
+		pathname *K;
+		LOOP_OVER_LINKED_LIST(K, pathname, kit->search_paths) {
+			pathname *P = Pathnames::up(K);
 			web_md *Wm = WebMetadata::get(P, NULL, V2_SYNTAX, NULL, FALSE, TRUE, NULL);
 			chapter_md *Cm;
 			LOOP_OVER_LINKED_LIST(Cm, chapter_md, Wm->chapters_md) {
@@ -306,10 +396,11 @@ part of the I6T kit.
 @ =
 	if (Input_Filename)
 		Input_File = Filenames::fopen(Input_Filename, "r");
-	for (int area=0; areano_i6t_file_areas; area++)
+	pathname *P;
+	LOOP_OVER_LINKED_LIST(P, pathname, kit->search_paths)
 		if (Input_File == NULL)
 			Input_File = Filenames::fopen(
-				Filenames::in(kit->i6t_files[area], segment_name), "r");
+				Filenames::in(P, segment_name), "r");
 	if (Input_File == NULL)
 		PipelineErrors::kit_error("unable to open the template segment '%S'", segment_name);