From 4c86f53d74f9dddeac4447512f83951c383ec9d4 Mon Sep 17 00:00:00 2001
From: Graham Nelson
Date: Sun, 17 May 2020 00:20:21 +0100
Subject: [PATCH] Improved Preform source
---
README.md | 2 +-
build.txt | 4 +-
docs/core-module/3-nl.html | 7 +-
docs/core-module/4-its.html | 6 +-
docs/inbuild/1-mn.html | 3 +-
docs/inflections-test/1-pc.html | 6 +-
docs/linguistics-test/1-pc.html | 3 +-
docs/problems-test/1-pc.html | 3 +-
docs/supervisor-module/5-ls.html | 2 +-
docs/supervisor-module/5-ps2.html | 1 -
docs/syntax-module/3-snt.html | 6 +-
docs/syntax-test/1-pc.html | 3 +-
docs/words-module/1-wm.html | 3 +-
docs/words-module/2-nw.html | 4 +-
docs/words-module/2-vcb.html | 8 +-
docs/words-module/3-fds.html | 4 +-
docs/words-module/3-lxr.html | 4 +-
docs/words-module/3-wrd.html | 14 +-
docs/words-module/4-ap.html | 58 +-
docs/words-module/4-bn.html | 49 +-
docs/words-module/4-lp.html | 876 +++++++-----------
docs/words-module/4-nnt.html | 360 +++++++
docs/words-module/4-prf.html | 8 +-
docs/words-module/4-to.html | 6 +-
docs/words-module/P-wtmd.html | 4 +-
docs/words-module/index.html | 5 +
docs/words-test/1-ut.html | 3 +-
inbuild/Chapter 1/Main.w | 3 +-
.../Chapter 5/Language Services.w | 2 +-
.../Chapter 5/Project Services.w | 1 -
.../core-module/Chapter 3/Natural Languages.w | 7 +-
.../Chapter 4/Introduction to Semantics.w | 6 +-
.../Chapter 1/Program Control.w | 6 +-
.../Chapter 1/Program Control.w | 3 +-
.../problems-test/Chapter 1/Program Control.w | 3 +-
shared/syntax-module/Chapter 3/Sentences.w | 6 +-
.../syntax-test/Chapter 1/Program Control.w | 3 +-
shared/words-module/Chapter 1/Words Module.w | 3 +-
shared/words-module/Chapter 4/About Preform.w | 44 +
.../Chapter 4/Basic Nonterminals.w | 32 +-
.../words-module/Chapter 4/Loading Preform.w | 711 ++++++--------
shared/words-module/Chapter 4/Nonterminals.w | 248 +++++
shared/words-module/Chapter 4/Preform.w | 2 +-
shared/words-module/Contents.w | 1 +
.../Preliminaries/What This Module Does.w | 4 +-
shared/words-test/Chapter 1/Unit Tests.w | 3 +-
46 files changed, 1476 insertions(+), 1064 deletions(-)
create mode 100644 docs/words-module/4-nnt.html
create mode 100644 shared/words-module/Chapter 4/Nonterminals.w
diff --git a/README.md b/README.md
index b5c45dcf2..fdc7740a6 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# Inform 7
-v10.1.0-alpha.1+6Q46 'Krypton' (12 May 2020)
+v10.1.0-alpha.1+6Q47 'Krypton' (17 May 2020)
## About Inform 7
diff --git a/build.txt b/build.txt
index b8d3b6041..afa64fcb4 100644
--- a/build.txt
+++ b/build.txt
@@ -1,3 +1,3 @@
Prerelease: alpha.1
-Build Date: 12 May 2020
-Build Number: 6Q46
+Build Date: 17 May 2020
+Build Number: 6Q47
diff --git a/docs/core-module/3-nl.html b/docs/core-module/3-nl.html
index 81919ed27..06e0208ef 100644
--- a/docs/core-module/3-nl.html
+++ b/docs/core-module/3-nl.html
@@ -155,10 +155,11 @@ this because we need access to it very quickly when parsing text substitutions.)
Loading Preform -
diff --git a/docs/words-test/1-ut.html b/docs/words-test/1-ut.html
index e2be98505..c65fe042c 100644
--- a/docs/words-test/1-ut.html
+++ b/docs/words-test/1-ut.html
@@ -107,8 +107,7 @@ function togglePopup(material_id) {
P = Pathnames::down(P, I"words-test");
P = Pathnames::down(P, I"Tangled");
filename *S = Filenames::in(P, I"Syntax.preform");
- wording W = LoadPreform::load(S);
- LoadPreform::parse(W, FALSE);
+ LoadPreform::load(S, NULL);
filename *F = Filenames::from_text(arg);
source_file *sf = TextFromFiles::feed_into_lexer(F, NULL_GENERAL_POINTER);
diff --git a/inbuild/Chapter 1/Main.w b/inbuild/Chapter 1/Main.w
index 4f392d7f3..22ddbd957 100644
--- a/inbuild/Chapter 1/Main.w
+++ b/inbuild/Chapter 1/Main.w
@@ -170,8 +170,7 @@ for use.
void Main::load_preform(inform_language *L) {
pathname *P = Pathnames::down(path_to_inbuild, I"Tangled");
filename *S = Filenames::in(P, I"Syntax.preform");
- wording W = LoadPreform::load(S);
- LoadPreform::parse(W, FALSE);
+ LoadPreform::load(S, NULL);
}
@h Target list.
diff --git a/inbuild/supervisor-module/Chapter 5/Language Services.w b/inbuild/supervisor-module/Chapter 5/Language Services.w
index 3cfc4eddf..b60345775 100644
--- a/inbuild/supervisor-module/Chapter 5/Language Services.w
+++ b/inbuild/supervisor-module/Chapter 5/Language Services.w
@@ -146,7 +146,7 @@ void Languages::read_Preform_definition(inform_language *L, linked_list *S) {
@ This function is called only from Preform...
-@d PREFORM_LANGUAGE_FROM_NAME Languages::Preform_find
+@d PREFORM_LANGUAGE_FROM_NAME_WORDS_CALLBACK Languages::Preform_find
=
inform_language *Languages::Preform_find(text_stream *name) {
diff --git a/inbuild/supervisor-module/Chapter 5/Project Services.w b/inbuild/supervisor-module/Chapter 5/Project Services.w
index f6ff2d815..c1e968f04 100644
--- a/inbuild/supervisor-module/Chapter 5/Project Services.w
+++ b/inbuild/supervisor-module/Chapter 5/Project Services.w
@@ -697,7 +697,6 @@ for the extensions they refer to, in a post-processing phase.
=
void Projects::read_source_text_for(inform_project *proj) {
Languages::read_Preform_definition(proj->language_of_syntax, proj->search_list);
- LoadPreform::set_language_of_syntax(proj->language_of_syntax);
Sentences::set_start_of_source(sfsm, -1);
parse_node *inclusions_heading, *implicit_heading;
diff --git a/inform7/core-module/Chapter 3/Natural Languages.w b/inform7/core-module/Chapter 3/Natural Languages.w
index 0606ce19b..770af8fd4 100644
--- a/inform7/core-module/Chapter 3/Natural Languages.w
+++ b/inform7/core-module/Chapter 3/Natural Languages.w
@@ -87,10 +87,11 @@ int NaturalLanguages::adaptive_person(inform_language *L) {
At present we do this only for English, but some day...
=
-wording NaturalLanguages::load_preform(inform_language *L) {
+int NaturalLanguages::load_preform(inform_language *L) {
if (L == NULL) internal_error("can't load preform from null language");
- filename *preform_file = Filenames::in(Languages::path_to_bundle(L), I"Syntax.preform");
- return LoadPreform::load_for_language(preform_file, L);
+ filename *preform_file =
+ Filenames::in(Languages::path_to_bundle(L), I"Syntax.preform");
+ return LoadPreform::load(preform_file, L);
}
@ Preform errors are handled here:
diff --git a/inform7/core-module/Chapter 4/Introduction to Semantics.w b/inform7/core-module/Chapter 4/Introduction to Semantics.w
index 57306192f..71b38b27b 100644
--- a/inform7/core-module/Chapter 4/Introduction to Semantics.w
+++ b/inform7/core-module/Chapter 4/Introduction to Semantics.w
@@ -208,10 +208,8 @@ whole thing into a |specification| for the rest of Inform to use.
=
void Semantics::read_preform(inform_language *L) {
@;
- wording W = NaturalLanguages::load_preform(L);
- int nonterminals_declared = LoadPreform::parse(W, FALSE);
-
- LOG("%d declarations read (%d words)\n", nonterminals_declared, Wordings::length(W));
+ int nonterminals_declared = NaturalLanguages::load_preform(L);
+ LOG("%d declarations read\n", nonterminals_declared);
}
@ =
diff --git a/inform7/inflections-test/Chapter 1/Program Control.w b/inform7/inflections-test/Chapter 1/Program Control.w
index 2b8883d13..3bdf6056b 100644
--- a/inform7/inflections-test/Chapter 1/Program Control.w
+++ b/inform7/inflections-test/Chapter 1/Program Control.w
@@ -58,8 +58,7 @@ void Main::load(text_stream *leaf) {
P = Pathnames::down(P, I"inflections-test");
P = Pathnames::down(P, I"Tangled");
filename *S = Filenames::in(P, leaf);
- wording W = LoadPreform::load(S);
- LoadPreform::parse(W, FALSE);
+ LoadPreform::load(S, NULL);
}
void Main::load_other(text_stream *leaf) {
@@ -67,8 +66,7 @@ void Main::load_other(text_stream *leaf) {
P = Pathnames::down(P, I"inflections-test");
P = Pathnames::down(P, I"Preform");
filename *S = Filenames::in(P, leaf);
- wording W = LoadPreform::load(S);
- LoadPreform::parse(W, FALSE);
+ LoadPreform::load(S, NULL);
}
void Main::ignore(int id, text_stream *arg, void *state) {
diff --git a/inform7/linguistics-test/Chapter 1/Program Control.w b/inform7/linguistics-test/Chapter 1/Program Control.w
index 6ba7288c2..1aa96cb51 100644
--- a/inform7/linguistics-test/Chapter 1/Program Control.w
+++ b/inform7/linguistics-test/Chapter 1/Program Control.w
@@ -45,8 +45,7 @@ void Main::load(text_stream *leaf) {
P = Pathnames::down(P, I"linguistics-test");
P = Pathnames::down(P, I"Tangled");
filename *S = Filenames::in(P, leaf);
- wording W = LoadPreform::load(S);
- LoadPreform::parse(W, FALSE);
+ LoadPreform::load(S, NULL);
}
void Main::ignore(int id, text_stream *arg, void *state) {
diff --git a/inform7/problems-test/Chapter 1/Program Control.w b/inform7/problems-test/Chapter 1/Program Control.w
index 3d004121e..121877dd9 100644
--- a/inform7/problems-test/Chapter 1/Program Control.w
+++ b/inform7/problems-test/Chapter 1/Program Control.w
@@ -40,8 +40,7 @@ void Main::load(text_stream *leaf) {
P = Pathnames::down(P, I"problems-test");
P = Pathnames::down(P, I"Tangled");
filename *S = Filenames::in(P, leaf);
- wording W = LoadPreform::load(S);
- LoadPreform::parse(W, FALSE);
+ LoadPreform::load(S, NULL);
}
void Main::ignore(int id, text_stream *arg, void *state) {
diff --git a/shared/syntax-module/Chapter 3/Sentences.w b/shared/syntax-module/Chapter 3/Sentences.w
index dde5f0a0d..455995f54 100644
--- a/shared/syntax-module/Chapter 3/Sentences.w
+++ b/shared/syntax-module/Chapter 3/Sentences.w
@@ -677,7 +677,11 @@ it would be too late.
@ =
current_sentence = new;
- LoadPreform::parse(GET_RW(, 1), TRUE);
+ wording W = GET_RW(, 1);
+ TEMPORARY_TEXT(wd);
+ WRITE_TO(wd, "%+W", Wordings::one_word(Wordings::first_wn(W)));
+ LoadPreform::parse_text(wd);
+ DISCARD_TEXT(wd);
Annotations::write_int(new, sentence_unparsed_ANNOT, FALSE);
@ Some tools using this module will want to push simple error messages out to
diff --git a/shared/syntax-test/Chapter 1/Program Control.w b/shared/syntax-test/Chapter 1/Program Control.w
index 2c9609d95..82460e1b7 100644
--- a/shared/syntax-test/Chapter 1/Program Control.w
+++ b/shared/syntax-test/Chapter 1/Program Control.w
@@ -38,8 +38,7 @@ void Main::load(text_stream *leaf) {
P = Pathnames::down(P, I"syntax-test");
P = Pathnames::down(P, I"Tangled");
filename *S = Filenames::in(P, leaf);
- wording W = LoadPreform::load(S);
- LoadPreform::parse(W, FALSE);
+ LoadPreform::load(S, NULL);
}
void Main::ignore(int id, text_stream *arg, void *state) {
diff --git a/shared/words-module/Chapter 1/Words Module.w b/shared/words-module/Chapter 1/Words Module.w
index 45905f09d..f41a56e5e 100644
--- a/shared/words-module/Chapter 1/Words Module.w
+++ b/shared/words-module/Chapter 1/Words Module.w
@@ -51,7 +51,8 @@ void WordsModule::start(void) {
Lexer::start();
Vocabulary::create_punctuation();
- LoadPreform::begin();
+ LoadPreform::create_punctuation();
+ Nonterminals::register();
}
void WordsModule::end(void) {
diff --git a/shared/words-module/Chapter 4/About Preform.w b/shared/words-module/Chapter 4/About Preform.w
index 8910b751a..5afbc6c54 100644
--- a/shared/words-module/Chapter 4/About Preform.w
+++ b/shared/words-module/Chapter 4/About Preform.w
@@ -106,3 +106,47 @@ with result 2.
::=
yellow | polkadot | green | white
=
+
+@ So far, the only ingredients of Preform syntax have been nonterminals and
+fixed words, but Preform also has "wildcards". For example, in
+= (text as Preform)
+ ::=
+ man with ... on his ...
+=
+would match, for example, "man with number 17 on his back", or "man with a
+chip on his shoulder". |...| matches any non-empty wording, and the text
+actually matched is recorded for any successful match. Wordings like this
+are numbered upwards from 1 to a maximum of 4, and are usually retrieved by
+whatever part of Inform requested the parse, using the |GET_RW| macro. For
+example:
+= (text)
+TEXT GET_RW(, 1) GET_RW(, 2)
+man with number 17 on his back number 17 back
+man with a chip on his shoulder a chip shoulder
+=
+A few internal nonterminals also generate word ranges, using |PUT_RW| to do so,
+and word ranges can also be inherited up from one nonterminal to another with
+|INHERIT_RANGES|: see //Loading Preform// for definitions of these macros.
+
+There are in fact several different wildcards:
+(a) |...| matches any non-empty text, as shown above.
+(b) |***| matches any text, including possibly the empty text.
+(c) |......| matches any non-empty text in which brackets are used in a
+balanced way -- thus they would match "alpha beta gamma" or "alpha (the
+Greek letter)", but not "alpha (the" or "Greek letter)".
+(d) |###| matches any single word, counting words as the lexer does.
+
+It is also possible to use braces to widen ranges. For example,
+= (text as Preform)
+ ::=
+ man with {... on his ...}
+=
+groups together anything matching |... on his ...| into a single range. There
+need not even be a wildcard inside the braces:
+= (text as Preform)
+ ::=
+ {man} with {... on his ...}
+=
+works fine, and makes two ranges, the first of which is always just "man".
+
+
diff --git a/shared/words-module/Chapter 4/Basic Nonterminals.w b/shared/words-module/Chapter 4/Basic Nonterminals.w
index 76eba7888..11be5a9bf 100644
--- a/shared/words-module/Chapter 4/Basic Nonterminals.w
+++ b/shared/words-module/Chapter 4/Basic Nonterminals.w
@@ -2,6 +2,22 @@
A handful of bare minimum Preform syntax.
+@h Nonterminal names.
+This is a typical internal nonterminal being defined, though it's a bit more
+meta than most -- it's a nonterminal which matches against the name of any
+nonterminal. (This is used only to parse inclusion requests for the debugging
+log.)
+
+Note that we use the |internal 1| to signal that a correct match must have
+exactly one word.
+
+=
+ internal 1 {
+ nonterminal *nt = Nonterminals::detect(Lexer::word(Wordings::first_wn(W)));
+ if (nt) { *XP = nt; return TRUE; }
+ return FALSE;
+}
+
@h Text positions.
A useful nonterminal which matches no text, but detects the position:
@@ -22,8 +38,8 @@ A useful nonterminal which matches no text, but detects the position:
}
@h Balancing.
-The following matches any text in which braces and brackets are correctly
-paired.
+The following regular (not internal!) nonterminal matches any text in which
+braces and brackets are correctly paired.
=
::=
@@ -60,21 +76,24 @@ for interpolations called "text substitutions".
=
internal 1 {
- if ((Wordings::nonempty(W)) && (Vocabulary::test_flags(Wordings::first_wn(W), TEXT_MC+TEXTWITHSUBS_MC))) {
+ if ((Wordings::nonempty(W)) &&
+ (Vocabulary::test_flags(Wordings::first_wn(W), TEXT_MC+TEXTWITHSUBS_MC))) {
*X = Wordings::first_wn(W); return TRUE;
}
return FALSE;
}
internal 1 {
- if ((Wordings::nonempty(W)) && (Vocabulary::test_flags(Wordings::first_wn(W), TEXTWITHSUBS_MC))) {
+ if ((Wordings::nonempty(W)) &&
+ (Vocabulary::test_flags(Wordings::first_wn(W), TEXTWITHSUBS_MC))) {
*X = Wordings::first_wn(W); return TRUE;
}
return FALSE;
}
internal 1 {
- if ((Wordings::nonempty(W)) && (Vocabulary::test_flags(Wordings::first_wn(W), TEXT_MC))) {
+ if ((Wordings::nonempty(W)) &&
+ (Vocabulary::test_flags(Wordings::first_wn(W), TEXT_MC))) {
*X = Wordings::first_wn(W); return TRUE;
}
return FALSE;
@@ -85,7 +104,8 @@ of text |""| is to provide a nonterminal matching it:
=
internal 1 {
- if ((Wordings::nonempty(W)) && (Word::compare_by_strcmp(Wordings::first_wn(W), L"\"\""))) {
+ if ((Wordings::nonempty(W)) &&
+ (Word::compare_by_strcmp(Wordings::first_wn(W), L"\"\""))) {
*X = Wordings::first_wn(W); return TRUE;
}
return FALSE;
diff --git a/shared/words-module/Chapter 4/Loading Preform.w b/shared/words-module/Chapter 4/Loading Preform.w
index 3417b703b..4c3af3d31 100644
--- a/shared/words-module/Chapter 4/Loading Preform.w
+++ b/shared/words-module/Chapter 4/Loading Preform.w
@@ -3,28 +3,28 @@
To read in structural definitions of natural language written in the
meta-language Preform.
-@h Introduction.
+@h Reading Preform syntax from a file or text.
The parser reads source text against a specific language only, if
-|language_of_source_text| is set; or, if it isn't, from any language.
+|primary_Preform_language| is set; or, if it isn't, from any language.
@default NATURAL_LANGUAGE_WORDS_TYPE void
=
-NATURAL_LANGUAGE_WORDS_TYPE *language_of_source_text = NULL;
+NATURAL_LANGUAGE_WORDS_TYPE *primary_Preform_language = NULL;
-void LoadPreform::set_language_of_syntax(NATURAL_LANGUAGE_WORDS_TYPE *L) {
- language_of_source_text = L;
+int LoadPreform::load(filename *F, NATURAL_LANGUAGE_WORDS_TYPE *L) {
+ primary_Preform_language = L;
+ return LoadPreform::parse(LoadPreform::feed_from_Preform_file(F), L);
}
-@h Reading Preform syntax from a file.
+@ We simply feed the lines one at a time. Preform is parsed with the same
+lexer as is used for Inform itself, but using the following set of characters
+as word-breaking punctuation marks:
+
+@d PREFORM_PUNCTUATION_MARKS L"{}[]_^?&\\"
=
-wording LoadPreform::load_for_language(filename *F, NATURAL_LANGUAGE_WORDS_TYPE *L) {
- LoadPreform::set_language_to_parse(L);
- return LoadPreform::load(F);
-}
-
-wording LoadPreform::load(filename *F) {
+wording LoadPreform::feed_from_Preform_file(filename *F) {
feed_t id = Feeds::begin();
if (TextFiles::read(F, FALSE,
NULL, FALSE, LoadPreform::load_helper, NULL, NULL) == FALSE)
@@ -32,101 +32,186 @@ wording LoadPreform::load(filename *F) {
return Feeds::end(id);
}
-@ We simply feed the lines one at a time. Preform is parsed with the regular
-lexer, using the following set of characters as word-breaking punctuation marks:
-
-@d PREFORM_PUNCTUATION_MARKS L"{}[]_^?&\\"
-
-=
-void LoadPreform::load_helper(text_stream *item_name,
- text_file_position *tfp, void *vnl) {
+void LoadPreform::load_helper(text_stream *item_name, text_file_position *tfp,
+ void *unused_state) {
WRITE_TO(item_name, "\n");
Feeds::feed_text_punctuated(item_name, PREFORM_PUNCTUATION_MARKS);
}
-@h Implementation.
-We must first clarify how word ranges, once matched in the parser, will be
-stored. Within each production, word ranges are numbered upwards from 1. Thus:
-= (text as InC)
- man with ... on his ...
+@ It is also possible to load additional Preform declarations from source
+text in Inform, and when that happens, the following is called:
+
=
-would, if it matched successfully, generate two word ranges, numbered 1 and 2.
-These are stored in memory belonging to the nonterminal; they are usually, but
-not always, then retrieved by whatever part of Inform requested the parse,
-using the |GET_RW| macro rather than a function call for speed. It's rare,
-but a few internal nonterminals also generate word ranges: they use the
-corresponding |PUT_RW| macro to do so. Lastly, we can pass word ranges up
-from one nonterminal to another, with |INHERIT_RANGES|.
-
-This form of storage incurs very little time or space overhead, and is possible
-only because the parser never backtracks. But it also follows that word ranges
-are overwritten if a nonterminal calls itself directly or indirectly: that is,
-the inner one's results are wiped out by the outer one. But this is no problem,
-since we never extract word-ranges from grammar which is recursive.
-
-Word range 0 is reserved in case we ever need it for the entire text matched
-by the nonterminal, but at present we don't need that.
-
-@d MAX_RANGES_PER_PRODUCTION 5 /* in fact, one less than this, since range 0 is reserved */
-@d GET_RW(nt, N) (nt->range_result[N])
-@d PUT_RW(nt, N, W) { nt->range_result[N] = W; }
-@d INHERIT_RANGES(from, to) {
- for (int i=1; irange_result[i] = from->range_result[i];
-}
-@d CLEAR_RW(from) {
- for (int i=0; irange_result[i] = EMPTY_WORDING;
+int LoadPreform::parse_text(text_stream *wd) {
+ wording W = Feeds::feed_text_punctuated(wd, PREFORM_PUNCTUATION_MARKS);
+ return LoadPreform::parse(W, primary_Preform_language);
}
-@ So here's the nonterminal structure. There are a few further complications
-for speed reasons:
+@ Either way, then, all that remains is to write //LoadPreform::parse//. But
+before we can get to that, we have to create the...
-(a) The minimum and maximum number of words which could ever be a match are
-precalculated. For example, if Preform can tell that N will only a run of
-between 3 and 7 words inclusive, then it can quickly reject any run of words
-outside that range. |INFINITE_WORD_COUNT| is taken as the maximum if N
-could in principle match text of any length. (However: note that a maximum of
-0 means that the maximum and minimum word counts are disregarded.)
+@h Reserved words in Preform.
+The ideal tool with which to parse Preform definitions would be Preform, but
+then how would we define the grammar required? So we will have to do this
+by hand, and in particular, we have to define Preform's syntactic punctuation
+marks explicitly. These are, in effect, the reserved words of the Preform
+notational language. (Note the absence of the |==>| marker: that's stripped
+out by //inweb// and never reaches the |Syntax.preform| file.)
-(b) A few internal nonterminals are "voracious". These are given the entire
-word range for their productions to eat, and encouraged to eat as much as
-they like, returning a word number to show how far they got. While this
-effect could be duplicated with suitable grammar and non-voracious nonterminals,
-it would be quite a bit slower, since it would have to test every possible
-word range.
+The bare letters K and L are snuck in here for convenience. They aren't
+actually used by anything in //words//, but are used for kind variables in
+//kinds//.
+
+=
+vocabulary_entry *AMPERSAND_V;
+vocabulary_entry *BACKSLASH_V;
+vocabulary_entry *CARET_V;
+vocabulary_entry *COLONCOLONEQUALS_V;
+vocabulary_entry *QUESTIONMARK_V;
+vocabulary_entry *QUOTEQUOTE_V;
+vocabulary_entry *SIXDOTS_V;
+vocabulary_entry *THREEASTERISKS_V;
+vocabulary_entry *THREEDOTS_V;
+vocabulary_entry *THREEHASHES_V;
+vocabulary_entry *UNDERSCORE_V;
+vocabulary_entry *language_V;
+vocabulary_entry *internal_V;
+
+vocabulary_entry *CAPITAL_K_V;
+vocabulary_entry *CAPITAL_L_V;
+
+@ =
+void LoadPreform::create_punctuation(void) {
+ AMPERSAND_V = Vocabulary::entry_for_text(L"&");
+ BACKSLASH_V = Vocabulary::entry_for_text(L"\\");
+ CARET_V = Vocabulary::entry_for_text(L"^");
+ COLONCOLONEQUALS_V = Vocabulary::entry_for_text(L":" ":=");
+ QUESTIONMARK_V = Vocabulary::entry_for_text(L"?");
+ QUOTEQUOTE_V = Vocabulary::entry_for_text(L"\"\"");
+ SIXDOTS_V = Vocabulary::entry_for_text(L"......");
+ THREEASTERISKS_V = Vocabulary::entry_for_text(L"***");
+ THREEDOTS_V = Vocabulary::entry_for_text(L"...");
+ THREEHASHES_V = Vocabulary::entry_for_text(L"###");
+ UNDERSCORE_V = Vocabulary::entry_for_text(L"_");
+ language_V = Vocabulary::entry_for_text(L"language");
+ internal_V = Vocabulary::entry_for_text(L"internal");
+
+ CAPITAL_K_V = Vocabulary::entry_for_text(L"k");
+ CAPITAL_L_V = Vocabulary::entry_for_text(L"l");
+}
+
+@h Parsing Preform.
+The syntax of the |Syntax.preform| is, fortunately, very simple. At any given
+time, we are parsing definitions for a given natural language |L|: for example,
+English.
+
+Note that Preform can contain comments in square brackets; but that the Lexer
+has already removed any such.
+
+=
+int LoadPreform::parse(wording W, NATURAL_LANGUAGE_WORDS_TYPE *L) {
+ NATURAL_LANGUAGE_WORDS_TYPE *current_natural_language = L;
+ int nonterminals_declared = 0;
+ LOOP_THROUGH_WORDING(wn, W) {
+ if (Lexer::word(wn) == PARBREAK_V) continue;
+ if ((Wordings::last_wn(W) >= wn+1) && (Lexer::word(wn) == language_V))
+ @
+ else if ((Wordings::last_wn(W) >= wn+1) && (Lexer::word(wn+1) == internal_V))
+ @
+ else if ((Wordings::last_wn(W) >= wn+2) && (Lexer::word(wn+1) == COLONCOLONEQUALS_V))
+ @
+ else
+ internal_error("syntax error in Preform declarations");
+ }
+ Optimiser::optimise_counts();
+ return nonterminals_declared;
+}
+
+@ We either switch to an existing natural language, or create a new one.
+
+@ =
+ TEMPORARY_TEXT(lname);
+ WRITE_TO(lname, "%W", Wordings::one_word(wn+1));
+ NATURAL_LANGUAGE_WORDS_TYPE *nl = NULL;
+ #ifdef PREFORM_LANGUAGE_FROM_NAME_WORDS_CALLBACK
+ nl = PREFORM_LANGUAGE_FROM_NAME_WORDS_CALLBACK(lname);
+ #endif
+ if (nl == NULL) {
+ LOG("Missing: %S\n", lname);
+ internal_error("tried to define for missing language");
+ }
+ DISCARD_TEXT(lname);
+ current_natural_language = nl;
+ wn++;
+
+@ Internal declarations appear as single lines in |Syntax.preform|.
+
+@ =
+ nonterminal *nt = Nonterminals::find(Lexer::word(wn));
+ if (nt->first_production_list) internal_error("internal is defined");
+ nt->marked_internal = TRUE;
+ wn++;
+ nonterminals_declared++;
+
+@ Regular declarations are much longer and continue until the end of the text,
+or until we reach a paragraph break. The body of such a declaration is a list
+of productions divided by stroke symbols.
+
+@ =
+ nonterminal *nt = Nonterminals::find(Lexer::word(wn));
+ production_list *pl;
+ @;
+ wn += 2;
+ int pc = 0;
+ while (TRUE) {
+ int x = wn;
+ while ((x <= Wordings::last_wn(W)) && (Lexer::word(x) != STROKE_V) &&
+ (Lexer::word(x) != PARBREAK_V)) x++;
+ if (wn < x) {
+ production *pr = LoadPreform::new_production(Wordings::new(wn, x-1), nt, pc++);
+ wn = x;
+ @;
+ }
+ if ((wn > Wordings::last_wn(W)) || (Lexer::word(x) == PARBREAK_V)) break; /* reached end */
+ wn++; /* advance past the stroke and continue */
+ }
+ wn--;
+ nonterminals_declared++;
+
+@ =
+ for (pl = nt->first_production_list; pl; pl = pl->next_production_list)
+ if (pl->definition_language == current_natural_language)
+ break;
+ if (pl == NULL) {
+ pl = CREATE(production_list);
+ pl->definition_language = current_natural_language;
+ pl->first_production = NULL;
+ pl->as_avinue = NULL;
+ @;
+ }
+
+@ =
+ if (nt->first_production_list == NULL) nt->first_production_list = pl;
+ else {
+ production_list *p = nt->first_production_list;
+ while ((p) && (p->next_production_list)) p = p->next_production_list;
+ p->next_production_list = pl;
+ }
+
+@ =
+ if (pl->first_production == NULL) pl->first_production = pr;
+ else {
+ production *p = pl->first_production;
+ while ((p) && (p->next_production)) p = p->next_production;
+ p->next_production = pr;
+ }
+
+
+
+
+@
@d MAX_RESULTS_PER_PRODUCTION 10
-@d INFINITE_WORD_COUNT 1000000000
-
-=
-typedef struct nonterminal {
- struct vocabulary_entry *nonterminal_id; /* e.g. |""| */
- int voracious; /* if true, scans whole rest of word range */
- int multiplicitous;
-
- int marked_internal; /* has, or will be given, an internal definition... */
- int (*internal_definition)(wording W, int *result, void **result_p); /* ...this one */
-
- struct production_list *first_production_list; /* if not internal, this defines it */
-
- int (*result_compositor)(int *r, void **rp, int *inters, void **inter_ps, wording *interW, wording W);
-
- struct wording range_result[MAX_RANGES_PER_PRODUCTION]; /* storage for word ranges matched */
-
- int optimised_in_this_pass; /* have the following been worked out yet? */
- int min_nt_words, max_nt_words; /* for speed */
- struct range_requirement nonterminal_req;
- int nt_req_bit; /* which hashing category the words belong to, or $-1$ if none */
-
- int number_words_by_production;
- unsigned int flag_words_in_production;
-
- int watched; /* watch goings-on to the debugging log */
- int nonterminal_tries; /* used only in instrumented mode */
- int nonterminal_matches; /* ditto */
- CLASS_DEFINITION
-} nonterminal;
@ Each (external) nonterminal is then defined by lists of productions:
potentially one for each language, though only English is required to define
@@ -262,20 +347,6 @@ typedef struct ptoken {
CLASS_DEFINITION
} ptoken;
-@ The parser records the result of the most recently matched nonterminal in the
-following global variables:
-
-=
-int most_recent_result = 0; /* this is the variable which |inweb| writes |<>| */
-void *most_recent_result_p = NULL; /* this is the variable which |inweb| writes |<