Inbuild now reads bibliographic sentences

2024-06-26 04:00:43 +03:00 · 2020-03-04 19:34:23 +00:00 · 2020-03-04 19:34:23 +00:00 · c65c3ac931
parent 39a77b4bae
commit c65c3ac931
30 changed files with 438 additions and 392 deletions
--- a/inbuild/Chapter
+++ b/inbuild/Chapter
@ -36,7 +36,6 @@ int main(int argc, char **argv) {
 	targets = NEW_LINKED_LIST(inbuild_copy);
 	@<Read the command line@>;
 	
-	path_to_inbuild = Pathnames::installation_path("INBUILD_PATH", I"inbuild");
 	if (Str::len(unit_test) > 0) dry_run_mode = TRUE;
 	int use = SHELL_METHODOLOGY;
 	if (dry_run_mode) use = DRY_RUN_METHODOLOGY;
@ -131,6 +130,12 @@ int main(int argc, char **argv) {
 	if (LinkedLists::len(unsorted_nest_list) == 0)
 		Inbuild::add_nest(
 			Pathnames::from_text(I"inform7/Internal"), INTERNAL_NEST_TAG);
+
+	path_to_inbuild = Pathnames::installation_path("INBUILD_PATH", I"inbuild");
+	pathname *P = Pathnames::subfolder(path_to_inbuild, I"Tangled");
+	filename *S = Filenames::in_folder(P, I"Syntax.preform");
+	wording W = Preform::load_from_file(S);
+	Preform::parse_preform(W, FALSE);
 	
 	CommandLine::play_back_log();
 	inbuild_copy *proj = NULL, *C;
@ -225,6 +230,7 @@ vocabulary_meaning Main::ignore(vocabulary_entry *ve) {
@d PREFORM_LANGUAGE_TYPE void
@d PARSE_TREE_TRAVERSE_TYPE void
@d SENTENCE_NODE Main::sentence_level
+@d PARSE_TREE_METADATA_SETUP SourceText::node_metadata

 =
 int Main::sentence_level(node_type_t t) {
--- a/inbuild/inbuild-module/Chapter
+++ b/inbuild/inbuild-module/Chapter
@ -206,7 +206,7 @@ inbuild_copy *Inbuild::optioneering_complete(inbuild_copy *C, int compile_only)
 	inbuild_phase = TINKERING_INBUILD_PHASE;
 	Inbuild::sort_nest_list();
 	inbuild_phase = NESTED_INBUILD_PHASE;
-	if (project) Projects::set_to_English(project);
+	if (project) Projects::read_source_text_for(project);
 	Inbuild::pass_kit_requests();
 	inbuild_phase = PROJECTED_INBUILD_PHASE;

@ -486,7 +486,7 @@ inform_project *Inbuild::create_shared_project(inbuild_copy *C) {

 =
 inform_project *Inbuild::project(void) {
-	RUN_ONLY_FROM_PHASE(PROJECTED_INBUILD_PHASE)
+	RUN_ONLY_FROM_PHASE(TINKERING_INBUILD_PHASE)
 	return shared_project;
 }

--- a/inbuild/inbuild-module/Chapter
+++ b/inbuild/inbuild-module/Chapter
@ -34,6 +34,7 @@ Setting up the use of this module.
@e extension_dictionary_entry_MT
@e known_extension_clash_MT
@e build_skill_MT
+@e control_structure_phrase_MT

 =
 ALLOCATE_INDIVIDUALLY(inform_kit)
@ -61,6 +62,7 @@ ALLOCATE_INDIVIDUALLY(copy_error)
 ALLOCATE_INDIVIDUALLY(extension_dictionary_entry)
 ALLOCATE_INDIVIDUALLY(known_extension_clash)
 ALLOCATE_INDIVIDUALLY(build_skill)
+ALLOCATE_INDIVIDUALLY(control_structure_phrase)

 ALLOCATE_IN_ARRAYS(inbuild_work_database_entry, 100)

@ -83,6 +85,7 @@ void InbuildModule::start(void) {
 	Inform7Skill::create();
 	Inform6Skill::create();
 	InblorbSkill::create();
+	ControlStructures::create_standard();
 }

@
--- a/inbuild/inbuild-module/Chapter
+++ b/inbuild/inbuild-module/Chapter
@ -157,6 +157,7 @@ typedef struct copy_error {
 	struct text_stream *details;
 	int details_N;
 	struct wording details_W;
+	struct parse_node *details_node;
 	wchar_t *word;
 	MEMORY_MANAGEMENT
 } copy_error;
@ -170,6 +171,7 @@ copy_error *Copies::new_error(int cat, text_stream *NB) {
 	CE->details = NULL;
 	CE->details_N = -1;
 	CE->details_W = EMPTY_WORDING;
+	CE->details_node = NULL;
 	CE->pos = TextFiles::nowhere();
 	CE->copy = NULL;
 	CE->word = NULL;
@ -245,6 +247,8 @@ void Copies::write_problem(OUTPUT_STREAM, copy_error *CE) {
 					WRITE("extension has an 'ends here' but no 'begins here'"); break;
 				case ExtMultipleEndsHere_SYNERROR:
 					WRITE("extension has multiple 'ends here' sentences"); break;
+				case BadTitleSentence_SYNERROR:
+					WRITE("bibliographic sentence at the start is malformed"); break;
 				default:
 					WRITE("syntax error"); break;
 			}
--- a/inbuild/inbuild-module/Chapter
+++ b/inbuild/inbuild-module/Chapter
@ -135,7 +135,6 @@ calls.
 	Feeds::feed_text(L"This sentence provides a firebreak, no more. ");
 	E = Extensions::Documentation::load(work);
 	if (E == NULL) return 0; /* shouldn't happen: it was there only moments ago */
-WRITE_TO(STDOUT, "Wel well %X\n", work);
 	Copies::read_source_text_for(E->as_copy);
 	Extensions::Documentation::write_extension_documentation(NULL, E, census_mode);

--- a/inbuild/inbuild-module/Chapter
+++ b/inbuild/inbuild-module/Chapter
@ -309,7 +309,7 @@ void Extensions::read_source_text_for(inform_extension *E) {
 		E->read_into_file->your_ref = STORE_POINTER_inbuild_copy(E->as_copy);
 		wording EXW = E->read_into_file->text_read;
 		if (Wordings::nonempty(EXW)) @<Break the extension's text into body and documentation@>;
-		Sentences::break(E->body_text, TRUE, E->as_copy);
+		Sentences::break(E->body_text, TRUE, E->as_copy, -1);
 		E->body_text_unbroken = FALSE;
 	}
 }
--- a/inbuild/inbuild-module/Chapter
+++ b/inbuild/inbuild-module/Chapter
@ -195,12 +195,6 @@ void Kits::early_source_text(OUTPUT_STREAM, inform_kit *K) {
 	if (K->early_source) WRITE("%S\n\n", K->early_source);
 }

-int Kits::number_of_early_fed_sentences(inform_kit *K) {
-	int N = LinkedLists::len(K->extensions);
-	if (K->early_source) N++;
-	return N;
-}
-
 linked_list *Kits::inter_paths(void) {
 	linked_list *inter_paths = NEW_LINKED_LIST(pathname);
 	inbuild_nest *N;
--- a/inbuild/inbuild-module/Chapter
+++ b/inbuild/inbuild-module/Chapter
@ -234,14 +234,6 @@ void Projects::early_source_text(OUTPUT_STREAM, inform_project *project) {
 		Kits::early_source_text(OUT, K);
 }

-int Projects::number_of_early_fed_sentences(inform_project *project) {
-	int N = 0;
-	inform_kit *K;
-	LOOP_OVER_LINKED_LIST(K, inform_kit, project->kits_to_include)
-		N += Kits::number_of_early_fed_sentences(K);
-	return N;
-}
-
 #ifdef CODEGEN_MODULE
 linked_list *Projects::list_of_inter_libraries(inform_project *project) {
 	linked_list *requirements_list = NEW_LINKED_LIST(link_instruction);
@ -335,9 +327,11 @@ void Projects::construct_graph(inform_project *project) {

@

+@e BadTitleSentence_SYNERROR
+
 =
 void Projects::read_source_text_for(inform_project *project) {
-	int wc = lexer_wordcount;
+	int wc = lexer_wordcount, bwc = -1;
 	TEMPORARY_TEXT(early);
 	Projects::early_source_text(early, project);
 	if (Str::len(early) > 0) Feeds::feed_stream(early);
@ -350,19 +344,99 @@ void Projects::read_source_text_for(inform_project *project) {
 		build_vertex *N;
 		LOOP_OVER_LINKED_LIST(N, build_vertex, L) {
 			filename *F = N->buildable_if_internal_file;
+			bwc = lexer_wordcount;
 			N->read_as = SourceText::read_file(project->as_copy, F, N->annotation,
 				FALSE, TRUE);
 		}
 	}
 	ParseTree::plant_parse_tree();
 	int l = ParseTree::push_attachment_point(tree_root);
-	Sentences::break(Wordings::new(wc, lexer_wordcount-1), FALSE, project->as_copy);
+	Sentences::break(Wordings::new(wc, lexer_wordcount-1), FALSE, project->as_copy, bwc);
 	ParseTree::pop_attachment_point(l);
 	#ifdef CORE_MODULE
 	StructuralSentences::add_inventions_heading();
 	#endif
+	if (project->language_of_play == NULL) Projects::set_to_English(project);
 }

+@ It might seem sensible to parse the opening sentence of the source text,
+the bibliographic sentence giving title and author, by looking at the result
+of sentence-breaking above. But this isn't fast enough, because the sentence
+also specifies the language used, and we need to know of any non-Engkish
+choice immediately. So a special hook in the |syntax| module calls the
+following routine as soon as |BIBLIOGRAPHIC_NT| sentence is found; thus,
+it happens during the call to |Sentences::break| above.
+
+@ =
+void Projects::notify_of_bibliographic_sentence(inform_project *project, parse_node *PN) {
+	wording W = ParseTree::get_text(PN);
+	if (<titling-line>(W)) {
+		text_stream *T = project->as_copy->edition->work->title;
+		if (project->as_copy->edition->work->author_name == NULL)
+			project->as_copy->edition->work->author_name = Str::new();
+		text_stream *A = project->as_copy->edition->work->author_name;
+		inform_language *L = <<rp>>;
+		if (L) {
+			Projects::set_language_of_play(project, L);
+			LOG("Language of play: %S\n", L->as_copy->edition->work->title);
+		}
+		@<Extract title and author name wording@>;
+		@<Dequote the title and, perhaps, author name@>;
+	} else {
+		copy_error *CE = Copies::new_error(SYNTAX_CE, NULL);
+		CE->error_subcategory = BadTitleSentence_SYNERROR;
+		CE->details_node = PN;
+		Copies::attach(project->as_copy, CE);
+	}
+}
+
+@ This is what the top line of the main source text should look like, if it's
+to declare the title and author.
+
+=
+<titling-line> ::=
+	<plain-titling-line> ( in <natural-language> ) |	==> R[1]; *XP = RP[2];
+	<plain-titling-line>								==> R[1]; *XP = NULL;
+
+<plain-titling-line> ::=
+	{<quoted-text-without-subs>} by ... |	==> TRUE
+	{<quoted-text-without-subs>}			==> FALSE
+
+@<Extract title and author name wording@> =
+	wording TW = GET_RW(<plain-titling-line>, 1);
+	wording AW = EMPTY_WORDING;
+	if (<<r>>) AW = GET_RW(<plain-titling-line>, 2);
+	Str::clear(T);
+	WRITE_TO(T, "%+W", TW);
+	if (Wordings::nonempty(AW)) {
+		Str::clear(A);
+		WRITE_TO(A, "%+W", AW);
+	}
+
+@ The author is sometimes given outside of quotation marks:
+
+>> "The Large Scale Structure of Space-Time" by Lindsay Lohan
+
+But not always:
+
+>> "Greek Rural Postmen and Their Cancellation Numbers" by "will.i.am"
+
+@<Dequote the title and, perhaps, author name@> =
+	Str::trim_white_space(T);
+	if ((Str::get_first_char(T) == '\"') && (Str::get_last_char(T) == '\"')) {
+		Str::delete_first_character(T);
+		Str::delete_last_character(T);
+		Str::trim_white_space(T);
+	}
+	LOG("Title: %S\n", T);
+	Str::trim_white_space(A);
+	if ((Str::get_first_char(A) == '\"') && (Str::get_last_char(A) == '\"')) {
+		Str::delete_first_character(A);
+		Str::delete_last_character(A);
+		Str::trim_white_space(A);
+	}
+	if (Str::len(A) > 0) LOG("Author: %S\n", A);
+
@ When Inform reads the (optional!) Options file, very early in its run, it
 tries to obey any use options in the file right away -- earlier even than
 <structural-sentence>. It spots these, very crudely, as sentences which
@ -373,6 +447,8 @@ stop -- it's needed before sentence-breaking has even taken place.
 <use-option-sentence-shape> ::=
 	use ... .

+wording options_file_wording = EMPTY_WORDING_INIT;
+
 void Projects::read_further_mandatory_text(filename *F) {
 	feed_t id = Feeds::begin();
 	TextFiles::read(F, TRUE,
--- a/inbuild/inbuild-module/Chapter
+++ b/inbuild/inbuild-module/Chapter
@ -0,0 +1,224 @@
+[ControlStructures::] Control Structures.
+
+To specify the syntax of control structures such as repeat, if and otherwise.
+
+@ Certain phrases are "structural": otherwise, if, repeat, while and so
+on. These have different expectations in terms of the layout of surrounding
+phrases in rule or phrase definitions, and the following structure defines
+the relevant behaviour. (The contents are static.)
+
+=
+typedef struct control_structure_phrase {
+	struct control_structure_phrase *subordinate_to;
+	int indent_subblocks;
+	int body_empty_except_for_subordinates;
+	int used_at_stage;
+	int is_a_loop;
+	int requires_new_syntax;
+	int allow_run_on;
+	wchar_t *keyword;
+	MEMORY_MANAGEMENT
+} control_structure_phrase;
+
+@ The following set is built in to the Inform language; Basic Inform and such
+extensions cannot extend it.
+
+=
+control_structure_phrase
+	*switch_CSP = NULL,
+	*if_CSP = NULL,
+	*repeat_CSP = NULL,
+	*while_CSP = NULL,
+	*otherwise_CSP = NULL,
+	*abbreviated_otherwise_CSP = NULL,
+	*otherwise_if_CSP = NULL,
+	*default_case_CSP = NULL,
+	*case_CSP = NULL,
+	*say_CSP = NULL,
+	*now_CSP = NULL,
+	*instead_CSP = NULL;
+
+@ The following functions attempt to contain information about the
+basic structural phrases in one place, so that if future loop constructs
+are added, they can fairly simply be put here.
+
+=
+control_structure_phrase *ControlStructures::new(void) {
+	control_structure_phrase *csp = CREATE(control_structure_phrase);
+	csp->subordinate_to = NULL;
+	csp->indent_subblocks = FALSE;
+	csp->body_empty_except_for_subordinates = FALSE;
+	csp->used_at_stage = -1;
+	csp->requires_new_syntax = FALSE;
+	csp->allow_run_on = FALSE;
+	csp->keyword = L"<none>";
+	csp->is_a_loop = FALSE;
+	return csp;
+}
+
+void ControlStructures::create_standard(void) {
+	switch_CSP = ControlStructures::new();
+	switch_CSP->body_empty_except_for_subordinates = TRUE;
+	switch_CSP->indent_subblocks = TRUE;
+	switch_CSP->requires_new_syntax = TRUE;
+	switch_CSP->keyword = L"if";
+
+	if_CSP = ControlStructures::new();
+	if_CSP->keyword = L"if";
+
+	repeat_CSP = ControlStructures::new();
+	repeat_CSP->keyword = L"repeat";
+	repeat_CSP->is_a_loop = TRUE;
+
+	while_CSP = ControlStructures::new();
+	while_CSP->keyword = L"while";
+	while_CSP->is_a_loop = TRUE;
+
+	otherwise_CSP = ControlStructures::new();
+	otherwise_CSP->subordinate_to =	if_CSP;
+	otherwise_CSP->used_at_stage = 1;
+
+	abbreviated_otherwise_CSP = ControlStructures::new();
+	abbreviated_otherwise_CSP->subordinate_to =	if_CSP;
+	abbreviated_otherwise_CSP->used_at_stage = 1;
+
+	otherwise_if_CSP = ControlStructures::new();
+	otherwise_if_CSP->subordinate_to = if_CSP;
+	otherwise_if_CSP->used_at_stage = 0;
+
+	case_CSP = ControlStructures::new();
+	case_CSP->subordinate_to = switch_CSP;
+	case_CSP->used_at_stage = 1;
+	case_CSP->requires_new_syntax = TRUE;
+	case_CSP->allow_run_on = TRUE;
+
+	default_case_CSP = ControlStructures::new();
+	default_case_CSP->subordinate_to = switch_CSP;
+	default_case_CSP->used_at_stage = 2;
+	default_case_CSP->requires_new_syntax = TRUE;
+	default_case_CSP->allow_run_on = TRUE;
+
+	say_CSP = ControlStructures::new();
+
+	now_CSP = ControlStructures::new();
+
+	instead_CSP = ControlStructures::new();
+}
+
+void ControlStructures::log(control_structure_phrase *csp) {
+	if (csp == if_CSP) LOG("IF");
+	if (csp == repeat_CSP) LOG("RPT");
+	if (csp == while_CSP) LOG("WHI");
+	if (csp == switch_CSP) LOG("SWI");
+	if (csp == otherwise_CSP) LOG("O");
+	if (csp == abbreviated_otherwise_CSP) LOG("AO");
+	if (csp == otherwise_if_CSP) LOG("OIF");
+	if (csp == case_CSP) LOG("CAS");
+	if (csp == default_case_CSP) LOG("DEF");
+	if (csp == say_CSP) LOG("SAY");
+	if (csp == now_CSP) LOG("NOW");
+	if (csp == instead_CSP) LOG("INS");
+	if (csp == NULL) LOG("---");
+}
+
+int ControlStructures::comma_possible(control_structure_phrase *csp) {
+	if ((csp == if_CSP) || (csp == switch_CSP) || (csp == otherwise_if_CSP))
+		return TRUE;
+	return FALSE;
+}
+
+int ControlStructures::is_a_loop(control_structure_phrase *csp) {
+	if (csp) return csp->is_a_loop;
+	return FALSE;
+}
+
+int ControlStructures::opens_block(control_structure_phrase *csp) {
+	if ((csp) && (csp->subordinate_to == NULL) &&
+		(csp != say_CSP) && (csp != now_CSP) && (csp != instead_CSP)) return TRUE;
+	return FALSE;
+}
+
+int ControlStructures::permits_break(control_structure_phrase *csp) {
+	if ((csp == repeat_CSP) || (csp == while_CSP)) return TRUE;
+	return FALSE;
+}
+
+wchar_t *ControlStructures::incipit(control_structure_phrase *csp) {
+	if (csp) return csp->keyword;
+	return L"<none>";
+}
+
+control_structure_phrase *ControlStructures::detect(wording W) {
+	if (<control-structure-phrase>(W)) {
+		if (<<rp>> == abbreviated_otherwise_CSP) return NULL;
+		return <<rp>>;
+	}
+	return NULL;
+}
+
+int ControlStructures::abbreviated_otherwise(wording W) {
+	if (<control-structure-phrase>(W)) {
+		if (<<rp>> == abbreviated_otherwise_CSP) return TRUE;
+	}
+	return FALSE;
+}
+
+control_structure_phrase *ControlStructures::detect_end(wording W) {
+	if (<end-control-structure-phrase>(W)) return <<rp>>;
+	return NULL;
+}
+
+@ Control structures such as "if" act as a sort of super-punctuation inside
+rule and phrase definitions, and in particular they affect the actual
+punctuation of the sentences there (consider the rules about colons versus
+semicolons). So, though it's still early in Inform's run, we need to seek
+them out.
+
+Here we parse the text of a command phrase which, if any, of the control
+structures it might be. Note that <s-command> has a grammar partially
+overlapping with this, and they need to match.
+
+@d NO_SIGF 0
+@d SAY_SIGF 1
+@d NOW_SIGF 2
+
+=
+<control-structure-phrase> ::=
+	if ... is begin |				==> 0; *XP = switch_CSP
+	if ... is |						==> 0; *XP = switch_CSP
+	if/unless ... |					==> 0; *XP = if_CSP
+	repeat ... |					==> 0; *XP = repeat_CSP
+	while ... |						==> 0; *XP = while_CSP
+	else/otherwise |				==> 0; *XP = otherwise_CSP
+	else/otherwise if/unless ... |	==> 0; *XP = otherwise_if_CSP
+	else/otherwise ... |			==> 0; *XP = abbreviated_otherwise_CSP
+	-- otherwise |					==> 0; *XP = default_case_CSP
+	-- ...							==> 0; *XP = case_CSP
+
+<end-control-structure-phrase> ::=
+	end if/unless |					==> 0; *XP = if_CSP
+	end while |						==> 0; *XP = while_CSP
+	end repeat						==> 0; *XP = repeat_CSP
+
+<other-significant-phrase> ::=
+	say ... |						==> SAY_SIGF
+	now ...							==> NOW_SIGF
+
+@ This is used to see if an "if" is being used with the comma notation:
+
+=
+<phrase-with-comma-notation> ::=
+	...... , ......
+
+@ This is used to see if an "if" is being used with the comma notation:
+
+=
+<instead-keyword> ::=
+	instead ... |
+	... instead
+
+@ Finally, this is used to see if a control structure opens a block:
+
+=
+<phrase-beginning-block> ::=
+	... begin
--- a/inbuild/inbuild-module/Chapter
+++ b/inbuild/inbuild-module/Chapter
@ -26,7 +26,9 @@ source_file *SourceText::read_file(inbuild_copy *C, filename *F, text_stream *sy
 			Copies::attach(C, Copies::new_error_on_file(OPEN_FAILED_CE, F));
 		} else {
 			fclose(handle);
+			#ifdef CORE_MODULE
 			if (documentation_only == FALSE) @<Tell console output about the file@>;
+			#endif
 		}
 	}
 	currently_lexing_into = NULL;
@ -55,17 +57,6 @@ application.

@

-@d SENTENCE_COUNT_MONITOR SourceText::increase_sentence_count
-
-=
-wording options_file_wording = EMPTY_WORDING_INIT;
-int SourceText::increase_sentence_count(wording W) {
-	if (Wordings::within(W, options_file_wording) == FALSE) return TRUE;
-	return FALSE;
-}
-
-@
-
@d LEXER_PROBLEM_HANDLER SourceText::lexer_problem_handler

 =
@ -200,6 +191,25 @@ known to most Inform users: it increases output to the debugging log.)
@e TABLE_NT             			/* "Table 1 - Counties of England" */
@e EQUATION_NT          			/* "Equation 2 - Newton's Second Law" */
@e TRACE_NT             			/* A sentence consisting of an asterisk and optional quoted text */
+@e INVOCATION_LIST_NT   		    /* Single invocation of a (possibly compound) phrase */
+
+@d list_node_type ROUTINE_NT
+@d list_entry_node_type INVOCATION_LIST_NT
+
+@ =
+void SourceText::node_metadata(void) {
+	ParseTree::md((parse_tree_node_type) { BIBLIOGRAPHIC_NT, "BIBLIOGRAPHIC_NT",    					0, 0,		L2_NCAT, 0 });
+	ParseTree::md((parse_tree_node_type) { ROUTINE_NT, "ROUTINE_NT", 			   					0, INFTY,	L2_NCAT, 0 });
+	ParseTree::md((parse_tree_node_type) { INFORM6CODE_NT, "INFORM6CODE_NT",		   					0, 0,		L2_NCAT, 0 });
+	ParseTree::md((parse_tree_node_type) { TABLE_NT, "TABLE_NT",					   					0, 0,		L2_NCAT, TABBED_CONTENT_NFLAG });
+	ParseTree::md((parse_tree_node_type) { EQUATION_NT, "EQUATION_NT",			   					0, 0,		L2_NCAT, 0 });
+	ParseTree::md((parse_tree_node_type) { TRACE_NT, "TRACE_NT",					   					0, 0,		L2_NCAT, 0 });
+	#ifndef CORE_MODULE
+	ParseTree::md((parse_tree_node_type) { INVOCATION_LIST_NT, "INVOCATION_LIST_NT",		   			0, INFTY,	L2_NCAT, 0 });
+	#endif
+}
+
+@

 =
 <structural-sentence> ::=
--- a/inbuild/inbuild-module/Chapter
+++ b/inbuild/inbuild-module/Chapter
--- a/inbuild/inbuild-module/Contents.w
+++ b/inbuild/inbuild-module/Contents.w
@ -36,8 +36,6 @@ Chapter 4: Managing Genres of Work
 	Pipeline Manager

 Chapter 5: Services for the Inform Compiler
-	Source Text
-	Virtual Machine Grammar
 	Kit Services
 	Extension Services
 	Extension Dictionary
@ -47,3 +45,8 @@ Chapter 5: Services for the Inform Compiler
 	Project Services
 	Language Services
 	Pipeline Services
+
+Chapter 6: Handling Inform Source Text
+	Source Text
+	Control Structures
+	Virtual Machine Grammar
--- a/inform7/core-module/Chapter
+++ b/inform7/core-module/Chapter
@ -53,7 +53,6 @@ We need to itemise the structures we'll want to allocate:
@e ph_stack_frame_box_MT
@e i6_inclusion_matter_MT
@e literal_list_MT
-@e control_structure_phrase_MT
@e adjective_meaning_MT
@e adjective_meaning_block_MT
@e measurement_definition_MT
@ -109,7 +108,6 @@ ALLOCATE_INDIVIDUALLY(binary_predicate)
 ALLOCATE_INDIVIDUALLY(booking)
 ALLOCATE_INDIVIDUALLY(constant_phrase)
 ALLOCATE_INDIVIDUALLY(contents_entry)
-ALLOCATE_INDIVIDUALLY(control_structure_phrase)
 ALLOCATE_INDIVIDUALLY(counting_data)
 ALLOCATE_INDIVIDUALLY(definition)
 ALLOCATE_INDIVIDUALLY(dval_written)
--- a/inform7/core-module/Chapter
+++ b/inform7/core-module/Chapter
@ -13,9 +13,8 @@ in order of when they work:

@e STARTED_CSEQ from 0
@e LEXICAL_CSEQ
-@e SEMANTIC_IA_CSEQ
@e SEMANTIC_LANGUAGE_CSEQ
-@e SEMANTIC_IB_CSEQ
+@e SEMANTIC_I_CSEQ
@e SEMANTIC_II_CSEQ
@e SEMANTIC_III_CSEQ
@e ASSERTIONS_PASS_1_CSEQ
@ -47,8 +46,7 @@ int Sequence::carry_out(int debugging) {
 		@<Boot up the compiler@>;
 		compiler_booted_up = TRUE;
 	}
-	@<Perform lexical analysis@>;
-	@<Perform semantic analysis@>;
+	@<Perform textual analysis@>;
 	@<Read the assertions in two passes@>;
 	@<Make the model world@>;
 	@<Tables and grammar@>;
@ -107,13 +105,9 @@ most of these worker functions are in the |core| module, some are not.
 	BENCH(InferenceSubjects::begin);
 	BENCH(Index::DocReferences::read_xrefs);

-@<Perform lexical analysis@> =
-	Task::advance_stage_to(LEXICAL_CSEQ, I"Lexical analysis", 0);
-	BENCH(Sentences::RuleSubtrees::create_standard_csps)
+@<Perform textual analysis@> =
+	Task::advance_stage_to(LEXICAL_CSEQ, I"Textual analysis", 0);
 	BENCH(Task::read_source_text)
-
-@<Perform semantic analysis@> =
-	Task::advance_stage_to(SEMANTIC_IA_CSEQ, I"Semantic analysis Ia", 1);
 	BENCH(Task::activate_language_elements)
 	BENCH(Extensions::Inclusion::traverse)
 	BENCH(Sentences::Headings::satisfy_dependencies)
@ -124,7 +118,7 @@ most of these worker functions are in the |core| module, some are not.
 	BENCH(BinaryPredicates::make_built_in)
 	BENCH(NewVerbs::add_inequalities)

-	Task::advance_stage_to(SEMANTIC_IB_CSEQ, I"Semantic analysis Ib", -1);
+	Task::advance_stage_to(SEMANTIC_I_CSEQ, I"Semantic analysis Ib", -1);
 	BENCH(Sentences::VPs::traverse)
 	BENCH(Sentences::Rearrangement::tidy_up_ofs_and_froms)
 	BENCH(Sentences::RuleSubtrees::register_recently_lexed_phrases)
--- a/inform7/core-module/Chapter
+++ b/inform7/core-module/Chapter
@ -113,6 +113,11 @@ target_vm *Task::vm(void) {
 	return inform7_task->task->for_vm;
 }

+inbuild_edition *Task::edition(void) {
+	if (inform7_task == NULL) internal_error("there is no current task");
+	return inform7_task->project->as_copy->edition;
+}
+
@ Resources in a Blorb file have unique ID numbers which are positive integers,
 but these are not required to start from 1, nor to be contiguous. For Inform,
 ID number 1 is reserved for the cover image (whether or not any cover image
--- a/inform7/core-module/Chapter
+++ b/inform7/core-module/Chapter
@ -260,6 +260,12 @@ void SourceProblems::issue_problems_arising(inbuild_copy *C) {
 							ExtensionManager::from_copy(C),
 							"has an 'ends here' with nothing having begun");
 						break;
+					case BadTitleSentence_SYNERROR:
+						current_sentence = CE->details_node;
+						Problems::Issue::sentence_problem(_p_(PM_BadTitleSentence),
+							"the initial bibliographic sentence can only be a title in double-quotes",
+							"possibly followed with 'by' and the name of the author.");
+						break;
 					default:
 						internal_error("unknown syntax error");
 				}
--- a/inform7/core-module/Chapter
+++ b/inform7/core-module/Chapter
@ -396,7 +396,7 @@ void Phrases::TypeData::Textual::parse(ph_type_data *phtd, wording XW, wording *

@<Does this comma presage phrase options?@> =
 	if ((<control-structure-phrase>(XW)) &&
-		(Sentences::RuleSubtrees::comma_possible(<<rp>>)))
+		(ControlStructures::comma_possible(<<rp>>)))
 		comma_presages_options = FALSE;

@ If you find the explanation in this message unconvincing, you're not alone.
--- a/inform7/core-module/Chapter
+++ b/inform7/core-module/Chapter
@ -205,7 +205,7 @@ Are we in the body of a loop, perhaps indirectly?
 int Frames::Blocks::inside_a_loop_body(void) {
 	int i;
 	for (i = current_block_stack.pb_sp-1; i >= 0; i--)
-		if (Sentences::RuleSubtrees::is_a_loop(current_block_stack.pb_stack[i].from_structure))
+		if (ControlStructures::is_a_loop(current_block_stack.pb_stack[i].from_structure))
 			return TRUE;
 	return FALSE;
 }
@ -224,7 +224,7 @@ int Frames::Blocks::current_block_level(void) {

 wchar_t *Frames::Blocks::name_of_current_block(void) {
 	if (block_being_compiled == NULL) return NULL;
-	return Sentences::RuleSubtrees::incipit(block_being_compiled->from_structure);
+	return ControlStructures::incipit(block_being_compiled->from_structure);
 }

 parse_node *Frames::Blocks::start_of_current_block(void) {
@ -248,7 +248,7 @@ So we do this by hand, jumping to a label placed just after the loop ends.
 int unique_breakage_count = 0;
 void Frames::Blocks::emit_break(void) {
 	for (int i = current_block_stack.pb_sp-1; i >= 0; i--)
-		if (Sentences::RuleSubtrees::permits_break(current_block_stack.pb_stack[i].from_structure)) {
+		if (ControlStructures::permits_break(current_block_stack.pb_stack[i].from_structure)) {
 			if (current_block_stack.pb_stack[i].label_following == -1)
 				current_block_stack.pb_stack[i].label_following =
 					unique_breakage_count++;
--- a/inform7/core-module/Chapter
+++ b/inform7/core-module/Chapter
@ -150,7 +150,7 @@ int Routines::Compile::code_block(int statement_count, parse_node *pn, int top_l
 int Routines::Compile::code_line(int statement_count, parse_node *p) {
 	control_structure_phrase *csp = ParseTree::get_control_structure_used(p);
 	parse_node *to_compile = p;
-	if (Sentences::RuleSubtrees::opens_block(csp)) {
+	if (ControlStructures::opens_block(csp)) {
 		Frames::Blocks::beginning_block_phrase(csp);
 		to_compile = p->down;
 	}
@ -509,7 +509,7 @@ henceforth to be true, so we simply compile empty code in that case.
 	else if (csp == switch_CSP) @<Compile a switch tail@>
 	else if (csp == say_CSP) @<Compile a say tail@>
 	else if (csp == instead_CSP) @<Compile an instead tail@>
-	else if (Sentences::RuleSubtrees::opens_block(csp)) @<Compile a loop tail@>;
+	else if (ControlStructures::opens_block(csp)) @<Compile a loop tail@>;

@<Compile an if tail@> =
 	;
--- a/inform7/core-module/Chapter
+++ b/inform7/core-module/Chapter
@ -146,7 +146,6 @@ void ParseTreeUsage::copy_annotations(parse_node_annotation *to, parse_node_anno
@e TOKEN_NT             			/* Used for tokens in grammar */

@e CODE_BLOCK_NT       			/* Holds a block of source material */
-@e INVOCATION_LIST_NT   		    /* Single invocation of a (possibly compound) phrase */
@e INVOCATION_LIST_SAY_NT		/* Single thing to be said */
@e INVOCATION_NT      			/* Usage of a phrase */
@e VOID_CONTEXT_NT  				/* When a void phrase is required */
@ -331,12 +330,7 @@ goes. The annotations used are identified by nonzero ID numbers, as follows:
 =
 void ParseTreeUsage::md(void) {
    /* first, the structural nodes: */
-	ParseTree::md((parse_tree_node_type) { BIBLIOGRAPHIC_NT, "BIBLIOGRAPHIC_NT",    					0, 0,		L2_NCAT, 0 });
-	ParseTree::md((parse_tree_node_type) { ROUTINE_NT, "ROUTINE_NT", 			   					0, INFTY,	L2_NCAT, 0 });
-	ParseTree::md((parse_tree_node_type) { INFORM6CODE_NT, "INFORM6CODE_NT",		   					0, 0,		L2_NCAT, 0 });
-	ParseTree::md((parse_tree_node_type) { TABLE_NT, "TABLE_NT",					   					0, 0,		L2_NCAT, TABBED_CONTENT_NFLAG });
-	ParseTree::md((parse_tree_node_type) { EQUATION_NT, "EQUATION_NT",			   					0, 0,		L2_NCAT, 0 });
-	ParseTree::md((parse_tree_node_type) { TRACE_NT, "TRACE_NT",					   					0, 0,		L2_NCAT, 0 });
+	SourceText::node_metadata();

 	ParseTree::md((parse_tree_node_type) { ALLOWED_NT, "ALLOWED_NT",				   					1, 1,		L3_NCAT, ASSERT_NFLAG });
 	ParseTree::md((parse_tree_node_type) { EVERY_NT, "EVERY_NT", 				   					0, INFTY,	L3_NCAT, ASSERT_NFLAG });
@ -697,7 +691,7 @@ void ParseTreeUsage::log_node(OUTPUT_STREAM, parse_node *pn) {
 		case INVOCATION_LIST_NT:
 		case CODE_BLOCK_NT: {
 			control_structure_phrase *csp = ParseTree::get_control_structure_used(pn);
-			WRITE("  "); Sentences::RuleSubtrees::log_control_structure(csp); WRITE(" ");
+			WRITE("  "); ControlStructures::log(csp); WRITE(" ");
 			if (pn->node_type == INVOCATION_LIST_NT)
 				WRITE("%d", ParseTree::int_annotation(pn, indentation_level_ANNOT));
 			else WRITE(" ");
--- a/inform7/core-module/Chapter
+++ b/inform7/core-module/Chapter
@ -11,41 +11,8 @@ simply listed after it in the parse tree, but we want them to become its
 children: this is the only thing the $A$-grammar does with rules, which
 otherwise wait until later to be dealt with.

-The single routine in this section accomplishes the regrouping: after it
-runs, every |INVOCATION_LIST_NT| is a child of the |ROUTINE_NT| header to which it
-belongs.
-
-@ Certain phrases are "structural": otherwise, if, repeat, while and so
-on. These have different expectations in terms of the layout of surrounding
-phrases in rule or phrase definitions, and the following structure defines
-the relevant behaviour. (The contents are static.)
-
-=
-typedef struct control_structure_phrase {
-	struct control_structure_phrase *subordinate_to;
-	int indent_subblocks;
-	int body_empty_except_for_subordinates;
-	int used_at_stage;
-	int is_a_loop;
-	int requires_new_syntax;
-	int allow_run_on;
-	wchar_t *keyword;
-	MEMORY_MANAGEMENT
-} control_structure_phrase;
-
-control_structure_phrase
-	*switch_CSP = NULL,
-	*if_CSP = NULL,
-	*repeat_CSP = NULL,
-	*while_CSP = NULL,
-	*otherwise_CSP = NULL,
-	*abbreviated_otherwise_CSP = NULL,
-	*otherwise_if_CSP = NULL,
-	*default_case_CSP = NULL,
-	*case_CSP = NULL,
-	*say_CSP = NULL,
-	*now_CSP = NULL,
-	*instead_CSP = NULL;
+The code in this section accomplishes the regrouping: after it runs, every
+|INVOCATION_LIST_NT| is a child of the |ROUTINE_NT| header to which it belongs.

@ This routine is used whenever new material is added. Whenever it finds a
 childless |ROUTINE_NT| followed by a sequence of |INVOCATION_LIST_NT| nodes, it
@ -148,7 +115,7 @@ void Sentences::RuleSubtrees::parse_routine_structure(parse_node *routine_node)
 	parse_node *p;
 	for (p = routine_node->down; p; p = p->next) {
 		control_structure_phrase *csp =
-			Sentences::RuleSubtrees::detect_control_structure(ParseTree::get_text(p));
+			ControlStructures::detect(ParseTree::get_text(p));
 		if (csp) {
 			int syntax_used = ParseTree::int_annotation(p, colon_block_command_ANNOT);
 			if (syntax_used == FALSE) { /* i.e., doesn't end with a colon */
@ -167,7 +134,7 @@ void Sentences::RuleSubtrees::parse_routine_structure(parse_node *routine_node)
 			if ((csp->requires_new_syntax) && (requires_colon_syntax == NULL))
 				requires_colon_syntax = p;
 		}
-		if (Sentences::RuleSubtrees::detect_end_control_structure(ParseTree::get_text(p))) {
+		if (ControlStructures::detect_end(ParseTree::get_text(p))) {
 			if (uses_begin_end_syntax == NULL)
 				uses_begin_end_syntax = p;
 		}
@ -258,7 +225,7 @@ more certainly, and similarly for "end X" phrases.
@<(b.2) Annotate the parse tree with control structure usage@> =
 	for (parse_node *p = routine_node->down; p; p = p->next) {
 		control_structure_phrase *csp;
-		csp = Sentences::RuleSubtrees::detect_control_structure(ParseTree::get_text(p));
+		csp = ControlStructures::detect(ParseTree::get_text(p));
 		if (csp) {
 			if ((ParseTree::int_annotation(p, colon_block_command_ANNOT)) ||
 				(<phrase-beginning-block>(ParseTree::get_text(p))) ||
@ -267,7 +234,7 @@ more certainly, and similarly for "end X" phrases.
 				if (csp == case_CSP) @<Trim a switch case to just the case value@>;
 			}
 		}
-		csp = Sentences::RuleSubtrees::detect_end_control_structure(ParseTree::get_text(p));
+		csp = ControlStructures::detect_end(ParseTree::get_text(p));
 		if (csp) ParseTree::set_end_control_structure_used(p, csp);
 	}

@ -289,7 +256,7 @@ to break this up.
 	for (parse_node *p = routine_node->down; p; p = p->next)
 		if (ParseTree::get_control_structure_used(p) == NULL) {
 			control_structure_phrase *csp;
-			csp = Sentences::RuleSubtrees::detect_control_structure(ParseTree::get_text(p));
+			csp = ControlStructures::detect(ParseTree::get_text(p));
 			if ((csp == if_CSP) && (<phrase-with-comma-notation>(ParseTree::get_text(p))))
 				@<Effect a comma expansion@>;
 		}
@ -331,7 +298,7 @@ to break this up.
 				ParseTree::int_annotation(rest_of_routine, indentation_level_ANNOT))) {
 			if (ParseTree::get_control_structure_used(rest_of_routine) == otherwise_CSP)
 				@<Deal with an immediately following otherwise@>
-			else if (Sentences::RuleSubtrees::abbreviated_otherwise(ParseTree::get_text(rest_of_routine)))
+			else if (ControlStructures::abbreviated_otherwise(ParseTree::get_text(rest_of_routine)))
 				@<Deal with an abbreviated otherwise node@>;
 		}

@ -744,7 +711,7 @@ whichever syntax is used. We finally make a meaningful tree out of it.
 		csp = ParseTree::get_control_structure_used(pn);
 		if (csp) {
 			go_down = TRUE;
-			if (Sentences::RuleSubtrees::opens_block(csp) == FALSE) {
+			if (ControlStructures::opens_block(csp) == FALSE) {
 				go_up = TRUE;
 				ParseTree::set_type(pn, CODE_BLOCK_NT);
 			}
@ -818,7 +785,7 @@ void Sentences::RuleSubtrees::police_code_block(parse_node *block, control_struc

 		csp = ParseTree::get_control_structure_used(p);
 		if (csp) {
-			if (Sentences::RuleSubtrees::opens_block(csp)) {
+			if (ControlStructures::opens_block(csp)) {
 				if ((p->next == NULL) ||
 					(ParseTree::get_end_control_structure_used(p->next) == NULL))
 					@<Issue problem for begin without end@>;
@ -1033,7 +1000,7 @@ annotations to them.
@ =
 void Sentences::RuleSubtrees::insert_cb_nodes(parse_node *block) {
 	for (parse_node *p = block->down, *prev_p = NULL; p; prev_p = p, p = p->next) {
-		if (Sentences::RuleSubtrees::opens_block(ParseTree::get_control_structure_used(p))) {
+		if (ControlStructures::opens_block(ParseTree::get_control_structure_used(p))) {
 			parse_node *blank_cb_node = ParseTree::new(CODE_BLOCK_NT);
 			ParseTree::set_control_structure_used(blank_cb_node,
 				ParseTree::get_control_structure_used(p));
@ -1261,188 +1228,3 @@ parse_node *Sentences::RuleSubtrees::end_node(parse_node *opening) {
 		ParseTree::int_annotation(opening, indentation_level_ANNOT));
 	return implicit_end;
 }
-
-@h Basic Structural Syntax.
-The following routine is an attempt to contain information about the
-basic structural phrases in one place, so that if future loop constructs
-are added, they can fairly simply be put here.
-
-=
-control_structure_phrase *Sentences::RuleSubtrees::csp_new(void) {
-	control_structure_phrase *csp = CREATE(control_structure_phrase);
-	csp->subordinate_to = NULL;
-	csp->indent_subblocks = FALSE;
-	csp->body_empty_except_for_subordinates = FALSE;
-	csp->used_at_stage = -1;
-	csp->requires_new_syntax = FALSE;
-	csp->allow_run_on = FALSE;
-	csp->keyword = L"<none>";
-	csp->is_a_loop = FALSE;
-	return csp;
-}
-
-void Sentences::RuleSubtrees::create_standard_csps(void) {
-	switch_CSP = Sentences::RuleSubtrees::csp_new();
-	switch_CSP->body_empty_except_for_subordinates = TRUE;
-	switch_CSP->indent_subblocks = TRUE;
-	switch_CSP->requires_new_syntax = TRUE;
-	switch_CSP->keyword = L"if";
-
-	if_CSP = Sentences::RuleSubtrees::csp_new();
-	if_CSP->keyword = L"if";
-
-	repeat_CSP = Sentences::RuleSubtrees::csp_new();
-	repeat_CSP->keyword = L"repeat";
-	repeat_CSP->is_a_loop = TRUE;
-
-	while_CSP = Sentences::RuleSubtrees::csp_new();
-	while_CSP->keyword = L"while";
-	while_CSP->is_a_loop = TRUE;
-
-	otherwise_CSP = Sentences::RuleSubtrees::csp_new();
-	otherwise_CSP->subordinate_to =	if_CSP;
-	otherwise_CSP->used_at_stage = 1;
-
-	abbreviated_otherwise_CSP = Sentences::RuleSubtrees::csp_new();
-	abbreviated_otherwise_CSP->subordinate_to =	if_CSP;
-	abbreviated_otherwise_CSP->used_at_stage = 1;
-
-	otherwise_if_CSP = Sentences::RuleSubtrees::csp_new();
-	otherwise_if_CSP->subordinate_to = if_CSP;
-	otherwise_if_CSP->used_at_stage = 0;
-
-	case_CSP = Sentences::RuleSubtrees::csp_new();
-	case_CSP->subordinate_to = switch_CSP;
-	case_CSP->used_at_stage = 1;
-	case_CSP->requires_new_syntax = TRUE;
-	case_CSP->allow_run_on = TRUE;
-
-	default_case_CSP = Sentences::RuleSubtrees::csp_new();
-	default_case_CSP->subordinate_to = switch_CSP;
-	default_case_CSP->used_at_stage = 2;
-	default_case_CSP->requires_new_syntax = TRUE;
-	default_case_CSP->allow_run_on = TRUE;
-
-	say_CSP = Sentences::RuleSubtrees::csp_new();
-
-	now_CSP = Sentences::RuleSubtrees::csp_new();
-
-	instead_CSP = Sentences::RuleSubtrees::csp_new();
-}
-
-void Sentences::RuleSubtrees::log_control_structure(control_structure_phrase *csp) {
-	if (csp == if_CSP) LOG("IF");
-	if (csp == repeat_CSP) LOG("RPT");
-	if (csp == while_CSP) LOG("WHI");
-	if (csp == switch_CSP) LOG("SWI");
-	if (csp == otherwise_CSP) LOG("O");
-	if (csp == abbreviated_otherwise_CSP) LOG("AO");
-	if (csp == otherwise_if_CSP) LOG("OIF");
-	if (csp == case_CSP) LOG("CAS");
-	if (csp == default_case_CSP) LOG("DEF");
-	if (csp == say_CSP) LOG("SAY");
-	if (csp == now_CSP) LOG("NOW");
-	if (csp == instead_CSP) LOG("INS");
-	if (csp == NULL) LOG("---");
-}
-
-int Sentences::RuleSubtrees::comma_possible(control_structure_phrase *csp) {
-	if ((csp == if_CSP) || (csp == switch_CSP) || (csp == otherwise_if_CSP))
-		return TRUE;
-	return FALSE;
-}
-
-int Sentences::RuleSubtrees::is_a_loop(control_structure_phrase *csp) {
-	if (csp) return csp->is_a_loop;
-	return FALSE;
-}
-
-int Sentences::RuleSubtrees::opens_block(control_structure_phrase *csp) {
-	if ((csp) && (csp->subordinate_to == NULL) && (csp != say_CSP) && (csp != now_CSP) && (csp != instead_CSP)) return TRUE;
-	return FALSE;
-}
-
-int Sentences::RuleSubtrees::permits_break(control_structure_phrase *csp) {
-	if ((csp == repeat_CSP) || (csp == while_CSP)) return TRUE;
-	return FALSE;
-}
-
-wchar_t *Sentences::RuleSubtrees::incipit(control_structure_phrase *csp) {
-	if (csp) return csp->keyword;
-	return L"<none>";
-}
-
-control_structure_phrase *Sentences::RuleSubtrees::detect_control_structure(wording W) {
-	if (<control-structure-phrase>(W)) {
-		if (<<rp>> == abbreviated_otherwise_CSP) return NULL;
-		return <<rp>>;
-	}
-	return NULL;
-}
-
-int Sentences::RuleSubtrees::abbreviated_otherwise(wording W) {
-	if (<control-structure-phrase>(W)) {
-		if (<<rp>> == abbreviated_otherwise_CSP) return TRUE;
-	}
-	return FALSE;
-}
-
-control_structure_phrase *Sentences::RuleSubtrees::detect_end_control_structure(wording W) {
-	if (<end-control-structure-phrase>(W)) return <<rp>>;
-	return NULL;
-}
-
-@ Control structures such as "if" act as a sort of super-punctuation inside
-rule and phrase definitions, and in particular they affect the actual
-punctuation of the sentences there (consider the rules about colons versus
-semicolons). So, though it's still early in Inform's run, we need to seek
-them out.
-
-Here we parse the text of a command phrase which, if any, of the control
-structures it might be. Note that <s-command> has a grammar partially
-overlapping with this, and they need to match.
-
-@d NO_SIGF 0
-@d SAY_SIGF 1
-@d NOW_SIGF 2
-
-=
-<control-structure-phrase> ::=
-	if ... is begin |				==> 0; *XP = switch_CSP
-	if ... is |						==> 0; *XP = switch_CSP
-	if/unless ... |					==> 0; *XP = if_CSP
-	repeat ... |					==> 0; *XP = repeat_CSP
-	while ... |						==> 0; *XP = while_CSP
-	else/otherwise |				==> 0; *XP = otherwise_CSP
-	else/otherwise if/unless ... |	==> 0; *XP = otherwise_if_CSP
-	else/otherwise ... |			==> 0; *XP = abbreviated_otherwise_CSP
-	-- otherwise |					==> 0; *XP = default_case_CSP
-	-- ...							==> 0; *XP = case_CSP
-
-<end-control-structure-phrase> ::=
-	end if/unless |					==> 0; *XP = if_CSP
-	end while |						==> 0; *XP = while_CSP
-	end repeat						==> 0; *XP = repeat_CSP
-
-<other-significant-phrase> ::=
-	say ... |						==> SAY_SIGF
-	now ...							==> NOW_SIGF
-
-@ This is used to see if an "if" is being used with the comma notation:
-
-=
-<phrase-with-comma-notation> ::=
-	...... , ......
-
-@ This is used to see if an "if" is being used with the comma notation:
-
-=
-<instead-keyword> ::=
-	instead ... |
-	... instead
-
-@ Finally, this is used to see if a control structure opens a block:
-
-=
-<phrase-beginning-block> ::=
-	... begin
--- a/inform7/core-module/Chapter
+++ b/inform7/core-module/Chapter
@ -4,8 +4,6 @@ To parse structurally important sentences.

@

-@d list_node_type ROUTINE_NT
-@d list_entry_node_type INVOCATION_LIST_NT

@h Sentence division.
 Sentence division can happen either early in Inform's run, when the vast bulk
--- a/inform7/if-module/Chapter
+++ b/inform7/if-module/Chapter
@ -78,84 +78,36 @@ int PL::Bibliographic::bibliographic_new_variable_notify(nonlocal_variable *q) {
 	return FALSE;
 }

-@ This is what the top line of the main source text should look like, if it's
-to declare the title and author.
-
-=
-<titling-line> ::=
-	<plain-titling-line> ( in <natural-language> ) |	==> R[1]; *XP = RP[2];
-	<plain-titling-line>								==> R[1]; *XP = NULL;
-
-<plain-titling-line> ::=
-	{<quoted-text-without-subs>} by ... |	==> TRUE
-	{<quoted-text-without-subs>}			==> FALSE
-
-@ That grammar is used at two points: first, to spot the natural language
-being used in the source text, something we have to look ahead to since
-it affects the grammar needed to understand the rest of the file --
-
-=
-inform_language *PL::Bibliographic::scan_language(parse_node *PN) {
-	if (<titling-line>(ParseTree::get_text(PN))) return <<rp>>;
-	return NULL;
-}
-
-@ -- and secondly, to parse the titling-line sentence in the regular way,
-setting bibliographic variables as needed. The following is called on the
-first sentence in the source text if and only if it begins with text in
-double quotes:
+@ The following is called in response to the bibliographic sentence. That in
+fact has already been parsed by Inbuild, so we simply extract the results,
+and set the "story title" and "story author" variables accordingly.

 =
 void PL::Bibliographic::bibliographic_data(parse_node *PN) {
-	if (<titling-line>(ParseTree::get_text(PN))) {
-		wording TW = GET_RW(<plain-titling-line>, 1);
-		wording AW = EMPTY_WORDING;
-		if (<<r>>) AW = GET_RW(<plain-titling-line>, 2);
-		if ((story_title_VAR) && (story_author_VAR)) {
-			@<Set the story title from the titling line@>;
-			if (Wordings::nonempty(AW)) @<Set the author from the titling line@>;
-		}
-	} else {
-		Problems::Issue::sentence_problem(_p_(PM_BadTitleSentence),
-			"the initial bibliographic sentence can only be a title in double-quotes",
-			"possibly followed with 'by' and the name of the author.");
+	inbuild_edition *edn = Task::edition();
+	TEMPORARY_TEXT(T);
+	TEMPORARY_TEXT(A);
+	WRITE_TO(T, "\"x%S\" ", edn->work->title);
+	WRITE_TO(A, "\"x%S\" ", edn->work->author_name);
+	wording TW = Feeds::feed_stream(T);
+	wording AW = Feeds::feed_stream(A);
+	DISCARD_TEXT(T);
+	DISCARD_TEXT(A);
+
+	if ((story_title_VAR) && (story_author_VAR)) {
+		parse_node *the_title;
+		if (<s-value>(TW)) the_title = <<rp>>;
+		else the_title = Specifications::new_UNKNOWN(TW);
+		Assertions::PropertyKnowledge::initialise_global_variable(story_title_VAR, the_title);
+		Strings::TextLiterals::suppress_quote_expansion(ParseTree::get_text(the_title));
+
+		parse_node *the_author;
+		if (<s-value>(AW)) the_author = <<rp>>;
+		else the_author = Specifications::new_UNKNOWN(AW);
+		Assertions::PropertyKnowledge::initialise_global_variable(story_author_VAR, the_author);
 	}
 }

-@ We must not of course simply write to the variables; we call the assertion
-machinery to generate an inference about their values, because that ensures
-that contradictions, and so forth, are properly complained about.
-
-@<Set the story title from the titling line@> =
-	parse_node *the_title;
-	if (<s-value>(TW)) the_title = <<rp>>;
-	else the_title = Specifications::new_UNKNOWN(TW);
-	Assertions::PropertyKnowledge::initialise_global_variable(story_title_VAR, the_title);
-	Strings::TextLiterals::suppress_quote_expansion(ParseTree::get_text(the_title));
-
-@ The author is often given outside of quotation marks:
-
->> "The Large Scale Structure of Space-Time" by Lindsay Lohan
-
-and in such cases we transcribe the name-words into quotes so that we can
-treat them as a text literal ("Lindsay Lohan").
-
-@<Set the author from the titling line@> =
-	TEMPORARY_TEXT(author_buffer);
-	if (<quoted-text>(AW) == FALSE) {
-		WRITE_TO(author_buffer, "\"%+W", AW);
-		for (int i=1, L=Str::len(author_buffer); i<L; i++)
-			if (Str::get_at(author_buffer, i) == '\"')
-				Str::put_at(author_buffer, i, '\'');
-		WRITE_TO(author_buffer, "\" ");
-		AW = Feeds::feed_stream(author_buffer);
-	}
-	DISCARD_TEXT(author_buffer);
-	parse_node *the_author;
-	if (<s-value>(AW)) the_author = <<rp>>;
-	else the_author = Specifications::new_UNKNOWN(AW);
-	Assertions::PropertyKnowledge::initialise_global_variable(story_author_VAR, the_author);
-
@ This unattractive routine performs a string comparison of the author's name
 against one that's supplied, case sensitively, and is used when deciding
 whether to print credits at run-time for extensions written by the same
--- a/inform7/kinds-test/Chapter
+++ b/inform7/kinds-test/Chapter
@ -67,7 +67,7 @@ void Unit::test_kinds(text_stream *arg) {
 	if (sf == NULL) { PRINT("File has failed to open\n"); return; }
 	ParseTree::plant_parse_tree();
 	PRINT("Read %d words\n", Wordings::length(W));
-	Sentences::break(W, FALSE, NULL);
+	Sentences::break(W, FALSE, NULL, -1);
 	ParseTree::traverse(Unit::diagram);

 	text_stream *save_DL = DL;
--- a/inform7/linguistics-test/Chapter
+++ b/inform7/linguistics-test/Chapter
@ -70,7 +70,7 @@ void Unit::test_diagrams(text_stream *arg) {
 	if (sf == NULL) { PRINT("File has failed to open\n"); return; }
 	ParseTree::plant_parse_tree();
 	PRINT("Read %d words\n", Wordings::length(W));
-	Sentences::break(W, FALSE, NULL);
+	Sentences::break(W, FALSE, NULL, -1);

 	text_stream *save_DL = DL;
 	DL = STDOUT;
--- a/inform7/problems-module/Chapter
+++ b/inform7/problems-module/Chapter
@ -490,7 +490,7 @@ void Problems::Issue::diagnose_further(void) {
 			"be in double quotes, \"like this\" and not 'like this'.)");

 	control_structure_phrase *csp =
-		Sentences::RuleSubtrees::detect_control_structure(ParseTree::get_text(current_sentence));
+		ControlStructures::detect(ParseTree::get_text(current_sentence));
 	if (csp)
 		Problems::issue_problem_segment(
 			" %P(The way this sentence starts makes me think it might have been "
--- a/inform7/problems-test/Chapter
+++ b/inform7/problems-test/Chapter
@ -37,7 +37,7 @@ void Unit::test_problems(text_stream *arg) {
 	if (sf == NULL) { PRINT("File has failed to open\n"); return; }
 	ParseTree::plant_parse_tree();
 	PRINT("Read %d words\n", Wordings::length(W));
-	Sentences::break(W, FALSE, NULL);
+	Sentences::break(W, FALSE, NULL, -1);

 	ParseTree::traverse(Unit::scan_tree);
 }
--- a/inform7/syntax-module/Chapter
+++ b/inform7/syntax-module/Chapter
@ -78,7 +78,7 @@ Structural node types are enumerated first:

@d BASE_OF_ENUMERATED_NTS    		0x80000000

-@e INVALID_NT from 0x80000000    /* No node with this node should ever exist */
+@e INVALID_NT from 0x80000000    /* No node with this node type should ever exist */

@e ROOT_NT              			/* Only one such node exists: the tree root */
@e INCLUSION_NT         			/* Holds a block of source material */
--- a/inform7/syntax-module/Chapter
+++ b/inform7/syntax-module/Chapter
@ -9,7 +9,6 @@ English sentences, and join each to the parse tree.
 position.

 =
-int no_sentences_read = 0;
 int sfsm_extension_position = 0; /* 0: not an extension; 1: before "begins here"; 2: before "ends here"; 3: after */
 node_type_t ssnt = 0;

@ -57,6 +56,7 @@ source_file *sfsm_source_file = NULL;
 int sfsm_inside_rule_mode = FALSE;
 int sfsm_skipping_material_at_level = -1;
 int sfsm_in_tabbed_mode = FALSE;
+int sfsm_main_source_start_wn = -1;
 EXTENSION_FILE_TYPE *sfsm_extension = NULL;

@ Now for the routine itself. We break into bite-sized chunks, each of which is
@ -65,7 +65,12 @@ which was used to end it. Each call to this routine represents one cycle of our
 finite state machine.

 =
-void Sentences::break(wording W, int is_extension, EXTENSION_FILE_TYPE *from_extension) {
+void Sentences::break(wording W, int is_extension,
+	EXTENSION_FILE_TYPE *from_extension, int bwc) {
+	while (((Wordings::nonempty(W))) && (compare_word(Wordings::first_wn(W), PARBREAK_V)))
+		W = Wordings::trim_first_word(W);
+	if (Wordings::empty(W)) return;
+
 	int sentence_start = Wordings::first_wn(W);
 	ParseTree::enable_last_sentence_cacheing();

@ -112,6 +117,7 @@ that is why these are global variables rather than locals in |Sentences::break|.
 	sfsm_extension = from_extension;
 	if (is_extension) sfsm_extension_position = 1;
 	else sfsm_extension_position = 0;
+	sfsm_main_source_start_wn = bwc;

@ A table is any sentence beginning with the word "Table". (Bad news for
 anyone writing "Table Mountain is a room.", of course, but there are other
@ -289,12 +295,6 @@ void Sentences::make_node(wording W, int stop_character) {

 	if (Wordings::empty(W)) internal_error("empty sentence generated");

-	#ifdef SENTENCE_COUNT_MONITOR
-	if (SENTENCE_COUNT_MONITOR(W)) no_sentences_read++;
-	#else
-	no_sentences_read++;
-	#endif
-
 	Vocabulary::identify_word_range(W); /* a precaution to catch any late unidentified text */

 	@<Detect a change of source file, and declare it as an implicit heading@>;
@ -446,20 +446,17 @@ substitutions. For instance,
 >> "A Dream of Fair to Middling Women" by Samuel Beckett

 This sentence is at the position matched by <if-start-of-source-text>.
-(Strictly speaking it's the second sentence read, not the first, because all
-source texts implicitly begin with an inclusion of the Standard Rules.)
+(It may not be the first sentence read, because implied extension inclusion
+sentences and options-file sentences may have been read already.)

 =
 <if-start-of-source-text> internal 0 {
 	int w1 = Wordings::first_wn(W);
-	#ifdef CORE_MODULE
-	int N = 1 + Projects::number_of_early_fed_sentences(Inbuild::project());
-	#endif
-	#ifndef CORE_MODULE
-	int N = 3;
-	#endif
-	if ((no_sentences_read == N) &&
-		((w1 == 0) || (compare_word(w1-1, PARBREAK_V)))) return TRUE;
+	while (w1 >= 0) {
+		if (w1 == sfsm_main_source_start_wn) return TRUE;
+		if (compare_word(w1-1, PARBREAK_V) == FALSE) return FALSE;
+		w1--;
+	}
 	return FALSE;
 }

@ -489,10 +486,9 @@ source texts implicitly begin with an inclusion of the Standard Rules.)
 				else if (language_extension_inclusion_point == NULL) language_extension_inclusion_point = new;
 			}
 			ParseTree::set_type(new, ssnt);
-			#ifdef IF_MODULE
+			#ifdef INBUILD_MODULE
 			if (ssnt == BIBLIOGRAPHIC_NT)
-				Projects::set_language_of_play(Inbuild::project(),
-					PL::Bibliographic::scan_language(new));
+				Projects::notify_of_bibliographic_sentence(Inbuild::project(), new);
 			#endif
 			return;
 		}
@ -614,9 +610,11 @@ instead of a semicolon. We may lament this, but it is so.)
@<Convert a rule preamble to a ROUTINE node and enter rule mode@> =
 	#ifdef list_node_type
 	if (stop_character == ':') {
-		if ((sfsm_inside_rule_mode) && (Sentences::RuleSubtrees::detect_control_structure(W))) {
+		if ((sfsm_inside_rule_mode) && (ControlStructures::detect(W))) {
 			ParseTree::set_type(new, list_entry_node_type);
+			#ifdef CORE_MODULE
 			ParseTree::annotate_int(new, colon_block_command_ANNOT, TRUE);
+			#endif
 			sfsm_inside_rule_mode = TRUE;
 			return;
 		} else {
--- a/inform7/syntax-test/Chapter
+++ b/inform7/syntax-test/Chapter
@ -26,7 +26,7 @@ void Unit::test_tree(text_stream *arg) {
 	if (sf == NULL) { PRINT("File has failed to open\n"); return; }
 	ParseTree::plant_parse_tree();
 	PRINT("Read %d words\n", Wordings::length(W));
-	Sentences::break(W, FALSE, NULL);
+	Sentences::break(W, FALSE, NULL, -1);

 	text_stream *save_DL = DL;
 	DL = STDOUT;