1
0
Fork 0
mirror of https://github.com/ganelson/inform.git synced 2024-06-26 04:00:43 +03:00

Moved sentence-breaking into inbuild

This commit is contained in:
Graham Nelson 2020-03-03 00:59:42 +00:00
parent 67fc2ae91c
commit ad713cc66a
14 changed files with 217 additions and 138 deletions

View file

@ -29,6 +29,7 @@ linked_list *inbuild_nest_list = NULL;
int main(int argc, char **argv) {
Foundation::start();
WordsModule::start();
SyntaxModule::start();
HTMLModule::start();
ArchModule::start();
InbuildModule::start();
@ -74,10 +75,10 @@ int main(int argc, char **argv) {
}
}
}
WordsModule::end();
ArchModule::end();
InbuildModule::end();
HTMLModule::end();
SyntaxModule::end();
WordsModule::end();
Foundation::end();
return 0;
@ -222,3 +223,68 @@ vocabulary_meaning Main::ignore(vocabulary_entry *ve) {
@
@d PREFORM_LANGUAGE_TYPE void
@d PARSE_TREE_TRAVERSE_TYPE void
@d SENTENCE_NODE Main::sentence_level
=
int Main::sentence_level(node_type_t t) {
return FALSE;
}
@
@d SYNTAX_PROBLEM_HANDLER Main::syntax_problem_handler
=
void Main::syntax_problem_handler(int err_no, wording W, void *ref, int k) {
TEMPORARY_TEXT(text);
WRITE_TO(text, "%+W", W);
switch (err_no) {
case UnexpectedSemicolon_SYNERROR:
Errors::with_text("unexpected semicolon in sentence: %S", text);
break;
case ParaEndsInColon_SYNERROR:
Errors::with_text("paragraph ends with a colon: %S", text);
break;
case SentenceEndsInColon_SYNERROR:
Errors::with_text("paragraph ends with a colon and full stop: %S", text);
break;
case SentenceEndsInSemicolon_SYNERROR:
Errors::with_text("paragraph ends with a semicolon and full stop: %S", text);
break;
case SemicolonAfterColon_SYNERROR:
Errors::with_text("paragraph ends with a colon and semicolon: %S", text);
break;
case SemicolonAfterStop_SYNERROR:
Errors::with_text("paragraph ends with a full stop and semicolon: %S", text);
break;
case ExtNoBeginsHere_SYNERROR:
Errors::nowhere("extension has no beginning");
break;
case ExtNoEndsHere_SYNERROR:
Errors::nowhere("extension has no end");
break;
case ExtSpuriouslyContinues_SYNERROR:
Errors::with_text("extension continues after end: %S", text);
break;
case HeadingOverLine_SYNERROR:
Errors::with_text("heading contains a line break: %S", text);
break;
case HeadingStopsBeforeEndOfLine_SYNERROR:
Errors::with_text("heading stops before end of line: %S", text);
break;
case ExtMultipleBeginsHere_SYNERROR:
Errors::nowhere("extension has multiple 'begins here' sentences");
break;
case ExtBeginsAfterEndsHere_SYNERROR:
Errors::nowhere("extension has a 'begins here' after its 'ends here'");
break;
case ExtEndsWithoutBegins_SYNERROR:
Errors::nowhere("extension has an 'ends here' but no 'begins here'");
break;
case ExtMultipleEndsHere_SYNERROR:
Errors::nowhere("extension has multiple 'ends here' sentences");
break;
}
DISCARD_TEXT(text);
}

View file

@ -8,6 +8,7 @@ Version Name: Avignon
Import: foundation
Import: inform7/words
Import: inform7/syntax
Import: html
Import: arch
Import: inbuild

View file

@ -311,6 +311,8 @@ void Extensions::read_source_text_for(inform_extension *E) {
E->read_into_file->your_ref = STORE_POINTER_inbuild_copy(E->as_copy);
wording EXW = E->read_into_file->text_read;
if (Wordings::nonempty(EXW)) @<Break the extension's text into body and documentation@>;
Sentences::break(E->body_text, E);
E->body_text_unbroken = FALSE;
}
}

View file

@ -337,6 +337,7 @@ void Projects::construct_graph(inform_project *project) {
=
void Projects::read_source_text_for(inform_project *project) {
int wc = lexer_wordcount;
TEMPORARY_TEXT(early);
Projects::early_source_text(early, project);
if (Str::len(early) > 0) Feeds::feed_stream(early);
@ -355,6 +356,13 @@ void Projects::read_source_text_for(inform_project *project) {
FALSE, TRUE);
}
}
ParseTree::plant_parse_tree();
int l = ParseTree::push_attachment_point(tree_root);
Sentences::break(Wordings::new(wc, lexer_wordcount-1), NULL);
ParseTree::pop_attachment_point(l);
#ifdef CORE_MODULE
StructuralSentences::add_inventions_heading();
#endif
}
int Projects::draws_from_source_file(inform_project *project, source_file *sf) {

View file

@ -93,3 +93,104 @@ void SourceText::lexer_problem_handler(int err, text_stream *desc, wchar_t *word
}
DISCARD_TEXT(erm);
}
@ Sentences in the source text are of five categories: dividing sentences,
which divide up the source into segments; structural sentences, which split
the source into different forms (standard text, tables, equations, I6 matter,
and so on); nonstructural sentences, which make grammatical definitions and
give Inform other more or less direct instructions; rule declarations; and
regular sentences, those which use the standard verbs. Examples:
>> Volume II [dividing]
>> Include Locksmith by Emily Short [structural]
>> Release along with a website [nonstructural]
>> Instead of looking [rule]
>> The cushion is on the wooden chair [regular]
Dividing sentences are always read, whereas the others may be skipped in
sections of source not being included for one reason or another. Dividing
sentences must match the following. Note that the extension end markers are
only read in extensions, so they can never accidentally match in the main
source text.
@e ExtMultipleBeginsHere_SYNERROR
@e ExtBeginsAfterEndsHere_SYNERROR
@e ExtEndsWithoutBegins_SYNERROR
@e ExtMultipleEndsHere_SYNERROR
=
<dividing-sentence> ::=
<if-start-of-paragraph> <heading> | ==> R[2]
<extension-end-marker-sentence> ==> R[1]
<heading> ::=
volume ... | ==> 1
book ... | ==> 2
part ... | ==> 3
chapter ... | ==> 4
section ... ==> 5
<extension-end-marker-sentence> ::=
... begin/begins here | ==> -1; @<Check we can begin an extension here@>;
... end/ends here ==> -2; @<Check we can end an extension here@>;
@<Check we can begin an extension here@> =
switch (sfsm_extension_position) {
case 1: sfsm_extension_position++; break;
case 2: SYNTAX_PROBLEM_HANDLER(ExtMultipleBeginsHere_SYNERROR, W, sfsm_extension, 0); break;
case 3: SYNTAX_PROBLEM_HANDLER(ExtBeginsAfterEndsHere_SYNERROR, W, sfsm_extension, 0); break;
}
@<Check we can end an extension here@> =
switch (sfsm_extension_position) {
case 1: SYNTAX_PROBLEM_HANDLER(ExtEndsWithoutBegins_SYNERROR, W, sfsm_extension, 0); break;
case 2: sfsm_extension_position++; break;
case 3: SYNTAX_PROBLEM_HANDLER(ExtMultipleEndsHere_SYNERROR, W, sfsm_extension, 0); break;
}
@<Detect a dividing sentence@> =
if (<dividing-sentence>(W)) {
switch (<<r>>) {
case -1: if (sfsm_extension_position > 0) begins_or_ends = 1;
break;
case -2:
if (sfsm_extension_position > 0) begins_or_ends = -1;
break;
default:
heading_level = <<r>>;
break;
}
}
@ Structural sentences are defined as follows. (The asterisk notation isn't
known to most Inform users: it increases output to the debugging log.)
@e BIBLIOGRAPHIC_NT /* For the initial title sentence */
@e ROUTINE_NT /* "Instead of taking something, ..." */
@e INFORM6CODE_NT /* "Include (- ... -) */
@e TABLE_NT /* "Table 1 - Counties of England" */
@e EQUATION_NT /* "Equation 2 - Newton's Second Law" */
@e TRACE_NT /* A sentence consisting of an asterisk and optional quoted text */
=
<structural-sentence> ::=
<if-start-of-source-text> <quoted-text> | ==> 0; ssnt = BIBLIOGRAPHIC_NT;
<if-start-of-source-text> <quoted-text> ... | ==> 0; ssnt = BIBLIOGRAPHIC_NT;
<language-modifying-sentence> | ==> R[1]
* | ==> 0; ssnt = TRACE_NT;
* <quoted-text-without-subs> | ==> 0; ssnt = TRACE_NT;
<if-start-of-paragraph> table ... | ==> 0; ssnt = TABLE_NT;
<if-start-of-paragraph> equation ... | ==> 0; ssnt = EQUATION_NT;
include <nounphrase-articled> by <nounphrase> | ==> 0; ssnt = INCLUDE_NT; *XP = RP[1]; ((parse_node *) RP[1])->next = RP[2];
include (- ... ==> 0; ssnt = INFORM6CODE_NT;
@ Properly speaking, despite the definition above, language modifying sentences
are nonstructural. So what are they doing here? The answer is that we need to
read them early on, because they affect the way that they parse all other
sentences. Whereas other nonstructural sentences can wait, these can't.
=
<language-modifying-sentence> ::=
include (- ### in the preform grammar | ==> -2; ssnt = INFORM6CODE_NT;
use ... language element/elements ==> -1

View file

@ -109,14 +109,12 @@ most of these worker functions are in the |core| module, some are not.
@<Perform lexical analysis@> =
Task::advance_stage_to(LEXICAL_CSEQ, I"Lexical analysis", 0);
BENCH(Task::read_source_text)
BENCH(Sentences::RuleSubtrees::create_standard_csps)
BENCH(Task::read_source_text)
@<Perform semantic analysis@> =
Task::advance_stage_to(SEMANTIC_IA_CSEQ, I"Semantic analysis Ia", 1);
BENCH(Task::activate_language_elements)
BENCH(ParseTreeUsage::plant_parse_tree)
BENCH(StructuralSentences::break_source)
BENCH(Extensions::Inclusion::traverse)
BENCH(Sentences::Headings::satisfy_dependencies)

View file

@ -14,7 +14,6 @@ can simply discard the search results.
=
int bundle_scan_made = FALSE;
int language_scan_top = -1;
void NaturalLanguages::scan(void) {
if (bundle_scan_made == FALSE) {
@ -22,7 +21,6 @@ void NaturalLanguages::scan(void) {
inbuild_requirement *req = Requirements::anything_of_genre(language_genre);
linked_list *L = NEW_LINKED_LIST(inbuild_search_result);
Nests::search_for(req, Inbuild::nest_list(), L);
language_scan_top = lexer_wordcount - 1;
}
}

View file

@ -213,7 +213,6 @@ void Semantics::read_preform(void) {
NaturalLanguages::scan();
wording W = NaturalLanguages::load_preform(Projects::get_language_of_syntax(Task::project()));
int nonterminals_declared = Preform::parse_preform(W, FALSE);
language_definition_top = lexer_wordcount - 1;
LOG("%d declarations read (%d words)\n", nonterminals_declared, Wordings::length(W));
}

View file

@ -134,13 +134,6 @@ void ParseTreeUsage::copy_annotations(parse_node_annotation *to, parse_node_anno
@
@e BIBLIOGRAPHIC_NT /* For the initial title sentence */
@e ROUTINE_NT /* "Instead of taking something, ..." */
@e INFORM6CODE_NT /* "Include (- ... -) */
@e TABLE_NT /* "Table 1 - Counties of England" */
@e EQUATION_NT /* "Equation 2 - Newton's Second Law" */
@e TRACE_NT /* A sentence consisting of an asterisk and optional quoted text */
@e ALLOWED_NT /* "An animal is allowed to have a description" */
@e EVERY_NT /* "every container" */
@e COMMON_NOUN_NT /* "a container" */

View file

@ -64,22 +64,11 @@ void StructuralSentences::new_language(wording W) {
"in favour of a new system with Inform kits.");
}
@h Sentence breaking.
The |Sentences::break| routine is used for long stretches of text,
normally entire files. The following provides a way for the |.i6t|
interpreter to apply it to the whole text as lexed, which provides the
original basis for parsing. (This won't be the entire source text,
though: extensions, including the Standard Rules, have yet to be read.)
@ This is for invented sentences, such as those creating the understood
variables.
=
void StructuralSentences::break_source(void) {
int l = ParseTree::push_attachment_point(tree_root);
int n = 0;
if (language_definition_top >= n) n = language_definition_top+1;
if (doc_references_top >= n) n = doc_references_top+1;
if (language_scan_top >= n) n = language_scan_top+1;
Sentences::break(Wordings::new(n, lexer_wordcount-1), NULL);
ParseTree::pop_attachment_point(l);
void StructuralSentences::add_inventions_heading(void) {
parse_node *implicit_heading = ParseTree::new(HEADING_NT);
ParseTree::set_text(implicit_heading, Feeds::feed_text_expanding_strings(L"Invented sentences"));
ParseTree::annotate_int(implicit_heading, sentence_unparsed_ANNOT, FALSE);
@ -88,98 +77,6 @@ void StructuralSentences::break_source(void) {
Sentences::Headings::declare(implicit_heading);
}
@ Sentences in the source text are of five categories: dividing sentences,
which divide up the source into segments; structural sentences, which split
the source into different forms (standard text, tables, equations, I6 matter,
and so on); nonstructural sentences, which make grammatical definitions and
give Inform other more or less direct instructions; rule declarations; and
regular sentences, those which use the standard verbs. Examples:
>> Volume II [dividing]
>> Include Locksmith by Emily Short [structural]
>> Release along with a website [nonstructural]
>> Instead of looking [rule]
>> The cushion is on the wooden chair [regular]
Dividing sentences are always read, whereas the others may be skipped in
sections of source not being included for one reason or another. Dividing
sentences must match the following. Note that the extension end markers are
only read in extensions, so they can never accidentally match in the main
source text.
=
<dividing-sentence> ::=
<if-start-of-paragraph> <heading> | ==> R[2]
<extension-end-marker-sentence> ==> R[1]
<heading> ::=
volume ... | ==> 1
book ... | ==> 2
part ... | ==> 3
chapter ... | ==> 4
section ... ==> 5
<extension-end-marker-sentence> ::=
... begin/begins here | ==> -1; @<Check we can begin an extension here@>;
... end/ends here ==> -2; @<Check we can end an extension here@>;
@<Check we can begin an extension here@> =
switch (sfsm_extension_position) {
case 1: sfsm_extension_position++; break;
case 2: Problems::Issue::extension_problem(_p_(PM_ExtMultipleBeginsHere),
sfsm_extension, "has more than one 'begins here' sentence"); break;
case 3: Problems::Issue::extension_problem(_p_(PM_ExtBeginsAfterEndsHere),
sfsm_extension, "has a further 'begins here' after an 'ends here'"); break;
}
@<Check we can end an extension here@> =
switch (sfsm_extension_position) {
case 1: Problems::Issue::extension_problem(_p_(BelievedImpossible),
sfsm_extension, "has an 'ends here' with nothing having begun"); break;
case 2: sfsm_extension_position++; break;
case 3: Problems::Issue::extension_problem(_p_(PM_ExtMultipleEndsHere),
sfsm_extension, "has more than one 'ends here' sentence"); break;
}
@<Detect a dividing sentence@> =
if (<dividing-sentence>(W)) {
switch (<<r>>) {
case -1: if (sfsm_extension_position > 0) begins_or_ends = 1;
break;
case -2:
if (sfsm_extension_position > 0) begins_or_ends = -1;
break;
default:
heading_level = <<r>>;
break;
}
}
@ Structural sentences are defined as follows. (The asterisk notation isn't
known to most Inform users: it increases output to the debugging log.)
=
<structural-sentence> ::=
<if-start-of-source-text> <quoted-text> | ==> 0; ssnt = BIBLIOGRAPHIC_NT;
<if-start-of-source-text> <quoted-text> ... | ==> 0; ssnt = BIBLIOGRAPHIC_NT;
<language-modifying-sentence> | ==> R[1]
* | ==> 0; ssnt = TRACE_NT;
* <quoted-text-without-subs> | ==> 0; ssnt = TRACE_NT;
<if-start-of-paragraph> table ... | ==> 0; ssnt = TABLE_NT;
<if-start-of-paragraph> equation ... | ==> 0; ssnt = EQUATION_NT;
include <nounphrase-articled> by <nounphrase> | ==> 0; ssnt = INCLUDE_NT; *XP = RP[1]; ((parse_node *) RP[1])->next = RP[2];
include (- ... ==> 0; ssnt = INFORM6CODE_NT;
@ Properly speaking, despite the definition above, language modifying sentences
are nonstructural. So what are they doing here? The answer is that we need to
read them early on, because they affect the way that they parse all other
sentences. Whereas other nonstructural sentences can wait, these can't.
=
<language-modifying-sentence> ::=
include (- ### in the preform grammar | ==> -2; ssnt = INFORM6CODE_NT;
use ... language element/elements ==> -1
@
@d SYNTAX_PROBLEM_HANDLER StructuralSentences::syntax_problem_handler
@ -288,6 +185,31 @@ void StructuralSentences::syntax_problem_handler(int err_no, wording W, void *re
"arises when a decimal point is misread as a full stop.)");
Problems::issue_problem_end();
break;
case ExtMultipleBeginsHere_SYNERROR: {
inform_extension *E = (inform_extension *) ref;
Problems::Issue::extension_problem(_p_(PM_ExtMultipleBeginsHere),
E, "has more than one 'begins here' sentence");
break;
}
case ExtBeginsAfterEndsHere_SYNERROR: {
inform_extension *E = (inform_extension *) ref;
Problems::Issue::extension_problem(_p_(PM_ExtBeginsAfterEndsHere),
E, "has a further 'begins here' after an 'ends here'");
break;
}
case ExtEndsWithoutBegins_SYNERROR: {
inform_extension *E = (inform_extension *) ref;
Problems::Issue::extension_problem(_p_(BelievedImpossible),
E, "has an 'ends here' with nothing having begun"); break;
break;
}
case ExtMultipleEndsHere_SYNERROR: {
inform_extension *E = (inform_extension *) ref;
Problems::Issue::extension_problem(_p_(PM_ExtMultipleEndsHere),
E, "has more than one 'ends here' sentence"); break;
break;
}
default: internal_error("unimplemented problem message");
}
}

View file

@ -140,15 +140,16 @@ parse tree.
DISCARD_TEXT(exft);
DISCARD_TEXT(exfa);
parse_node *at = current_sentence;
inform_extension *E = Extensions::Inclusion::load(req);
if (E) {
Extensions::set_inclusion_sentence(E, current_sentence);
Extensions::set_inclusion_sentence(E, at);
Extensions::set_VM_text(E, RW);
}
if ((E) && (E->body_text_unbroken)) {
Sentences::break(E->body_text, E);
E->body_text_unbroken = FALSE;
}
// if ((E) && (E->body_text_unbroken)) {
// Sentences::break(E->body_text, E);
// E->body_text_unbroken = FALSE;
// }
@h Extension loading.
Extensions are loaded here.

View file

@ -48,7 +48,6 @@ void Index::DocReferences::read_xrefs(void) {
xrefs_read = TRUE;
TextFiles::read(Inbuild::file_from_installation(DOCUMENTATION_XREFS_IRES), TRUE,
NULL, FALSE, Index::DocReferences::read_xrefs_helper, NULL, NULL);
doc_references_top = lexer_wordcount - 1;
}
}

View file

@ -78,18 +78,8 @@ with result 2.
yellow | polkadot | green | white
@h Implementation.
We read the Preform file for English early in Inform's run, and since it
goes through the standard lexer, it makes words. The following holds the
word number of the last of these words. (The same is also true for documentation
cross-references, which are not really anything to do with Preform.)
=
int language_definition_top = -1;
int doc_references_top = -1;
@ Now for nonterminals. We must first clarify how word ranges, once matched in
the parser, will be stored. Within each production, word ranges are numbered
upwards from 1. Thus:
We must first clarify how word ranges, once matched in the parser, will be
stored. Within each production, word ranges are numbered upwards from 1. Thus:
|man with ... on his ...|

View file

@ -105,6 +105,7 @@ INBUILDX = inbuild/Tangled/inbuild
{tool} INBUILDTOOL inbuild inbuild
{dep} INBUILDTOOL on FOUNDATION
{dep} INBUILDTOOL on WORDS
{dep} INBUILDTOOL on SYNTAX
{dep} INBUILDTOOL on HTML
{dep} INBUILDTOOL on ARCH
{dep} INBUILDTOOL on INBUILD