diff --git a/docs/if-module/1-im.html b/docs/if-module/1-im.html index e4c9788ac..af920405d 100644 --- a/docs/if-module/1-im.html +++ b/docs/if-module/1-im.html @@ -73,7 +73,7 @@ function togglePopup(material_id) { COMPILE_WRITER(action_pattern *, ActionPatterns::log) COMPILE_WRITER(command_grammar *, CommandGrammars::log) COMPILE_WRITER(cg_line *, CGLines::log) -COMPILE_WRITER(cg_token *, CGTokens::log) +COMPILE_WRITER(cg_token *, CGTokens::log) COMPILE_WRITER(action_name_list *, ActionNameLists::log) COMPILE_WRITER(anl_entry *, ActionNameLists::log_entry) COMPILE_WRITER(action_name *, ActionNameNames::log) @@ -118,7 +118,7 @@ function togglePopup(material_id) {
     REGISTER_WRITER('A', ActionPatterns::log);
-    REGISTER_WRITER('c', CGTokens::log);
+    REGISTER_WRITER('c', CGTokens::log);
     REGISTER_WRITER('G', CommandGrammars::log);
     REGISTER_WRITER('g', CGLines::log);
     REGISTER_WRITER('L', ActionNameLists::log);
diff --git a/docs/if-module/5-cg.html b/docs/if-module/5-cg.html
index 6cfc2b4ca..6b8470502 100644
--- a/docs/if-module/5-cg.html
+++ b/docs/if-module/5-cg.html
@@ -334,7 +334,7 @@ token called "[suitable colour]".
     return cg;
 }
 
-command_grammar *CommandGrammars::named_token_by_name(wording W) {
+command_grammar *CommandGrammars::named_token_by_name(wording W) {
     command_grammar *cg;
     LOOP_OVER(cg, command_grammar)
         if ((cg->cg_is == CG_IS_TOKEN) && (Wordings::match(W, cg->token_name)))
@@ -506,7 +506,7 @@ This returns the kind if so, or 
-kind *CommandGrammars::get_kind_matched(command_grammar *cg) {
+kind *CommandGrammars::get_kind_matched(command_grammar *cg) {
     return DeterminationTypes::get_single_kind(&(cg->cg_type));
 }
 
@@ -548,7 +548,7 @@ recurses back here.

-parse_node *CommandGrammars::determine(command_grammar *cg, int depth) {
+parse_node *CommandGrammars::determine(command_grammar *cg, int depth) {
     current_sentence = cg->where_cg_created;
     If this CG produces a value we have determined already, return that20.1;
     If recursion went impossibly deep, the CG grammar must be ill-founded20.2;
diff --git a/docs/if-module/5-cgl.html b/docs/if-module/5-cgl.html
index 0856547dc..b27c6189f 100644
--- a/docs/if-module/5-cgl.html
+++ b/docs/if-module/5-cgl.html
@@ -361,10 +361,10 @@ detected by the following function:
     if ((cgl->tokens)
         && (cgl->tokens->next_token == NULL)
         && (cgl->tokens->slash_class == 0)
-        && (CGTokens::is_literal(cgl->tokens))
+        && (CGTokens::is_literal(cgl->tokens))
         && (cgl->pluralised == FALSE)
         && (CGLines::conditional(cgl) == FALSE))
-        return Wordings::first_wn(CGTokens::text(cgl->tokens));
+        return Wordings::first_wn(CGTokens::text(cgl->tokens));
     return -1;
 }
 
@@ -432,8 +432,8 @@ definition no effect, and disappears without trace in this process. int alternatives_group = 0; cg_token *class_start = NULL; LOOP_THROUGH_CG_TOKENS(cgt, cgl) { - if ((cgt->next_token) && (Wordings::length(CGTokens::text(cgt->next_token)) == 1) && - (Lexer::word(Wordings::first_wn(CGTokens::text(cgt->next_token))) == + if ((cgt->next_token) && (Wordings::length(CGTokens::text(cgt->next_token)) == 1) && + (Lexer::word(Wordings::first_wn(CGTokens::text(cgt->next_token))) == FORWARDSLASH_V)) { if (cgt->slash_class == 0) { class_start = cgt; alternatives_group++; start new equiv class @@ -443,8 +443,8 @@ definition no effect, and disappears without trace in this process. if (cgt->next_token->next_token) cgt->next_token->next_token->slash_class = alternatives_group; if ((cgt->next_token->next_token) && - (Wordings::length(CGTokens::text(cgt->next_token->next_token)) == 1) && - (Lexer::word(Wordings::first_wn(CGTokens::text(cgt->next_token->next_token))) == + (Wordings::length(CGTokens::text(cgt->next_token->next_token)) == 1) && + (Lexer::word(Wordings::first_wn(CGTokens::text(cgt->next_token->next_token))) == DOUBLEDASH_V)) { class_start->slash_dash_dash = TRUE; cgt->next_token = cgt->next_token->next_token->next_token; excise both @@ -461,7 +461,7 @@ definition no effect, and disappears without trace in this process.
     LOOP_THROUGH_CG_TOKENS(cgt, cgl)
         if ((cgt->slash_class > 0) &&
-            (CGTokens::is_literal(cgt) == FALSE)) {
+            (CGTokens::is_literal(cgt) == FALSE)) {
             StandardProblems::sentence_problem(Task::syntax_tree(),
                 _p_(PM_OverAmbitiousSlash),
                 "the slash '/' can only be used between single literal words",
@@ -593,7 +593,7 @@ little bit more than a "[text]" later.
 
 

For \(R=10\), the following might thus happen. (I've simplified this table by having the individual tokens all score 1, but in fact they can score a range -of small numbers: see CGTokens::score_bonus.) +of small numbers: see CGTokens::score_bonus.)

@@ -616,23 +616,23 @@ parsing the player's command at run-time. For the exact sorting rules, see below
 
     int nulls_count = 0, pos = 0;
     for (cg_token *cgt = first; cgt; cgt = cgt->next_token) {
-        parse_node *spec = CGTokens::determine(cgt, depth);
-        int score = CGTokens::score_bonus(cgt);
+        parse_node *spec = CGTokens::determine(cgt, depth);
+        int score = CGTokens::score_bonus(cgt);
         if ((score < 0) || (score >= CGL_SCORE_TOKEN_RANGE))
             internal_error("token score out of range");
         LOGIF(GRAMMAR_CONSTRUCTION, "token %d/%d: <%W> --> $P (score %d)\n",
-            pos+1, line_length, CGTokens::text(cgt), spec, score);
+            pos+1, line_length, CGTokens::text(cgt), spec, score);
         if (spec) {
             Text tokens contribute also to the understanding sort bonus16.1.1;
             int score_multiplier = 1;
             if (DeterminationTypes::get_no_values_described(&(cgl->cgl_type)) == 0)
                 score_multiplier = CGL_SCORE_TOKEN_RANGE;
             DeterminationTypes::add_term(&(cgl->cgl_type), spec,
-                CGTokens::is_multiple(cgt));
+                CGTokens::is_multiple(cgt));
             cgl->general_sort_bonus += score*score_multiplier;
         } else nulls_count++;
 
-        if (CGTokens::is_multiple(cgt)) multiples++;
+        if (CGTokens::is_multiple(cgt)) multiples++;
         pos++;
     }
     if (nulls_count == line_length)
@@ -695,8 +695,8 @@ which parses to a K_understan
     }
 
     for (cg_token *cgt = first; cgt; cgt = cgt->next_token) {
-        if ((CGTokens::is_topic(cgt)) && (cgt->next_token) &&
-            (CGTokens::is_literal(cgt->next_token) == FALSE)) {
+        if ((CGTokens::is_topic(cgt)) && (cgt->next_token) &&
+            (CGTokens::is_literal(cgt->next_token) == FALSE)) {
             StandardProblems::sentence_problem(Task::syntax_tree(), _p_(PM_TextFollowedBy),
                 "a '[text]' token must either match the end of some text, or be followed "
                 "by definitely known wording",
@@ -719,7 +719,7 @@ which parses to a K_understan
         for (cg_token *token = cgl->tokens; token; token = token->next_token) {
             int code_mode = TRUE; if (cg->cg_is == CG_IS_COMMAND) code_mode = FALSE;
             int consult_mode = (cg->cg_is == CG_IS_CONSULT)?TRUE:FALSE;
-            kind *K = CGTokens::verify_and_find_kind(token, code_mode, consult_mode);
+            kind *K = CGTokens::verify_and_find_kind(token, code_mode, consult_mode);
             if (K) {
                 if (token_values == 2) internal_error("too many value-producing tokens");
                 token_value_kinds[token_values++] = K;
@@ -920,7 +920,7 @@ the specificity of the tokens is what decides. The first token is more important
 than the second, and a more specific token comes before a lower one.
 

-

See CGTokens::determine for how the score of an individual token +

See CGTokens::determine for how the score of an individual token is worked out.

diff --git a/docs/if-module/5-cgt.html b/docs/if-module/5-cgt.html index fd5f1a432..206a8ee87 100644 --- a/docs/if-module/5-cgt.html +++ b/docs/if-module/5-cgt.html @@ -60,7 +60,7 @@ function togglePopup(material_id) {

CGs are list of CG lines, which are lists of CG tokens.

-
+

§1. Introduction. Until 2021, CG tokens were held as parse nodes in the syntax tree, with a special type TOKEN_NT and a set of annotations, but as cute as that was @@ -82,7 +82,7 @@ object as follows: CLASS_DEFINITION } cg_token; -cg_token *CGTokens::cgt_of(wording W, int lit) { +cg_token *CGTokens::cgt_of(wording W, int lit) { cg_token *cgt = CREATE(cg_token); cgt->text_of_token = W; cgt->slash_dash_dash = FALSE; @@ -118,25 +118,13 @@ mark: thus "get away/off/out" becomes

define GRAMMAR_PUNCTUATION_MARKS L".,:;?!(){}[]/"  note the slash
 
-cg_token *CGTokens::tokenise(wording W) {
-    wchar_t *as_wide_string = Lexer::word_text(Wordings::first_wn(W));
+wording CGTokens::break(wchar_t *text, int expand) {
     Reject this if it contains punctuation2.1;
-    wording TW = Feeds::feed_C_string_full(as_wide_string, TRUE,
-        GRAMMAR_PUNCTUATION_MARKS);
+    wording TW = Feeds::feed_C_string_full(text, expand,
+        GRAMMAR_PUNCTUATION_MARKS, TRUE);
     Reject this if it contains two consecutive commas2.2;
-
-    cg_token *tokens = CGTokens::break_into_tokens(TW);
-    if (tokens == NULL) {
-        StandardProblems::sentence_problem(Task::syntax_tree(),
-            _p_(PM_UnderstandEmptyText),
-            "'understand' should be followed by text which contains at least "
-            "one word or square-bracketed token",
-            "so for instance 'understand \"take [something]\" as taking' is fine, "
-            "but 'understand \"\" as the fog' is not. The same applies to the contents "
-            "of 'topic' columns in tables, since those are also instructions for "
-            "understanding.");
-    }
-    return tokens;
+    Reject this if it slashes off a numerical word2.3;
+    return TW;
 }
 

§2.1. Reject this if it contains punctuation2.1 = @@ -144,13 +132,13 @@ mark: thus "get away/off/out" becomes

     int skip = FALSE, literal_punct = FALSE;
-    for (int i=0; as_wide_string[i]; i++) {
-        if (as_wide_string[i] == '[') skip = TRUE;
-        if (as_wide_string[i] == ']') skip = FALSE;
+    for (int i=0; text[i]; i++) {
+        if (text[i] == '[') skip = TRUE;
+        if (text[i] == ']') skip = FALSE;
         if (skip) continue;
-        if ((as_wide_string[i] == '.') || (as_wide_string[i] == ',') ||
-            (as_wide_string[i] == '!') || (as_wide_string[i] == '?') ||
-            (as_wide_string[i] == ':') || (as_wide_string[i] == ';'))
+        if ((text[i] == '.') || (text[i] == ',') ||
+            (text[i] == '!') || (text[i] == '?') ||
+            (text[i] == ':') || (text[i] == ';'))
             literal_punct = TRUE;
     }
     if (literal_punct) {
@@ -159,7 +147,7 @@ mark: thus "get away/off/out" becomes
             "or more specifically cannot contain any of these: . , ! ? : ; since they "
             "are already used in various ways by the parser, and would not correctly "
             "match here.");
-        return NULL;
+        return EMPTY_WORDING;
     }
 
  • This code is used in §2.
@@ -182,11 +170,62 @@ mark: thus "get away/off/out" becomes "brackets, this problem message is also sometimes seen " "if empty square brackets are used, as in 'Understand " "\"bless []\" as blessing.'"); - return NULL; + return EMPTY_WORDING; }
-

§3. The following tiny Preform grammar is then used to break up the resulting +

§2.3. Reject this if it slashes off a numerical word2.3 = +

+ +
+    LOOP_THROUGH_WORDING(i, TW)
+        if (Lexer::word(i) == FORWARDSLASH_V)
+            if (((i < Wordings::last_wn(TW)) && (CGTokens::numerical(i+1))) ||
+                ((i > Wordings::first_wn(TW)) && (CGTokens::numerical(i-1)))) {
+                StandardProblems::sentence_problem(Task::syntax_tree(),
+                    _p_(PM_SlashCutsDigits),
+                    "'understand' uses a slash '/' here in a way which cuts off something "
+                    "which contains only digits",
+                    "and this will not do anything good. (Note that a slash in grammar "
+                    "like this means an alternative choice of word.)");
+                return EMPTY_WORDING;
+            }
+
+ +

§3.

+ +
+int CGTokens::numerical(int wn) {
+    wchar_t *text = Lexer::word_text(wn);
+    for (int i=0; i<Wide::len(text); i++)
+        if (Characters::isdigit(text[i]) == FALSE)
+            return FALSE;
+    return TRUE;
+}
+
+

§4. And here the result becomes a token list: +

+ +
+cg_token *CGTokens::tokenise(wording W) {
+    wchar_t *as_wide_string = Lexer::word_text(Wordings::first_wn(W));
+    wording TW = CGTokens::break(as_wide_string, TRUE);
+    cg_token *tokens = CGTokens::break_into_tokens(TW);
+    if (Wordings::empty(TW)) return NULL;
+    if (tokens == NULL) {
+        StandardProblems::sentence_problem(Task::syntax_tree(),
+            _p_(PM_UnderstandEmptyText),
+            "'understand' should be followed by text which contains at least "
+            "one word or square-bracketed token",
+            "so for instance 'understand \"take [something]\" as taking' is fine, "
+            "but 'understand \"\" as the fog' is not. The same applies to the contents "
+            "of 'topic' columns in tables, since those are also instructions for "
+            "understanding.");
+    }
+    return tokens;
+}
+
+

§5. The following tiny Preform grammar is then used to break up the resulting text at commas:

@@ -197,13 +236,13 @@ text at commas: ... ==> { FALSE, - }
-

§4. The following function takes a wording and turns it into a linked list of +

§6. The following function takes a wording and turns it into a linked list of CG tokens, divided by commas:

-cg_token *CGTokens::break_into_tokens(wording W) {
-    return CGTokens::break_into_tokens_r(NULL, W);
+cg_token *CGTokens::break_into_tokens(wording W) {
+    return CGTokens::break_into_tokens_r(NULL, W);
 }
 cg_token *CGTokens::break_into_tokens_r(cg_token *list, wording W) {
     <grammar-token-breaking>(W);
@@ -211,35 +250,34 @@ CG tokens, divided by commas:
         case NOT_APPLICABLE: {
             wording LW = GET_RW(<grammar-token-breaking>, 1);
             wording RW = GET_RW(<grammar-token-breaking>, 2);
-            list = CGTokens::break_into_tokens_r(list, LW);
-            list = CGTokens::break_into_tokens_r(list, RW);
+            list = CGTokens::break_into_tokens_r(list, LW);
+            list = CGTokens::break_into_tokens_r(list, RW);
             break;
         }
         case TRUE:
             Word::dequote(Wordings::first_wn(W));
             if (*(Lexer::word_text(Wordings::first_wn(W))) == 0) return list;
-            W = Feeds::feed_C_string_full(Lexer::word_text(Wordings::first_wn(W)),
-                FALSE, GRAMMAR_PUNCTUATION_MARKS);
+            W = CGTokens::break(Lexer::word_text(Wordings::first_wn(W)), FALSE);
             LOOP_THROUGH_WORDING(i, W) {
                 cg_token *cgt = CGTokens::cgt_of(Wordings::one_word(i), TRUE);
-                list = CGTokens::add_to_list(cgt, list);
+                list = CGTokens::add_to_list(cgt, list);
             }
             break;
         case FALSE: {
             cg_token *cgt = CGTokens::cgt_of(W, FALSE);
-            list = CGTokens::add_to_list(cgt, list);
+            list = CGTokens::add_to_list(cgt, list);
             break;
         }
     }
     return list;
 }
 
-

§5. If list represents the head of the list (and is NULL for an empty list), +

§7. If list represents the head of the list (and is NULL for an empty list), this adds cgt at the end and returns the new head.

-cg_token *CGTokens::add_to_list(cg_token *cgt, cg_token *list) {
+cg_token *CGTokens::add_to_list(cg_token *cgt, cg_token *list) {
     if (list == NULL) return cgt;
     if (cgt == NULL) return list;
     cg_token *x = list;
@@ -248,19 +286,19 @@ this adds cgt a
     return list;
 }
 
-

§6. As the above shows, the text of a token is not necessarily a single word, +

§8. As the above shows, the text of a token is not necessarily a single word, unless it's a literal.

-wording CGTokens::text(cg_token *cgt) {
+wording CGTokens::text(cg_token *cgt) {
     return cgt?(cgt->text_of_token):(EMPTY_WORDING);
 }
 
-

§7. The GTC. The GTC, or grammar token code, is a sort of type indicator for tokens. As +

§9. The GTC. The GTC, or grammar token code, is a sort of type indicator for tokens. As produced by the tokeniser above, tokens initially have GTC either UNDETERMINED_GTC or LITERAL_GTC. Differentiation of non-literal tokens into other types happens -in CGTokens::determine. +in CGTokens::determine.

Note that there are two sets of GTC values, one set positive, one negative. The @@ -286,7 +324,7 @@ generating I6 code. define MULTIEXCEPT_TOKEN_GTC -8 like I6 multiexcept

-int CGTokens::is_literal(cg_token *cgt) {
+int CGTokens::is_literal(cg_token *cgt) {
     if ((cgt) && (cgt->grammar_token_code == LITERAL_GTC)) return TRUE;
     return FALSE;
 }
@@ -296,18 +334,18 @@ generating I6 code.
     return FALSE;
 }
 
-int CGTokens::is_topic(cg_token *cgt) {
+int CGTokens::is_topic(cg_token *cgt) {
     if ((cgt) && (cgt->grammar_token_code == TOPIC_TOKEN_GTC)) return TRUE;
     return FALSE;
 }
 
-

§8. A multiple token is one which permits multiple matches in the run-time command +

§10. A multiple token is one which permits multiple matches in the run-time command parser: for instance, the player can type ALL where a MULTI_TOKEN_GTC is expected.

-int CGTokens::is_multiple(cg_token *cgt) {
+int CGTokens::is_multiple(cg_token *cgt) {
     switch (cgt->grammar_token_code) {
         case MULTI_TOKEN_GTC:
         case MULTIINSIDE_TOKEN_GTC:
@@ -318,10 +356,10 @@ expected.
     return FALSE;
 }
 
-

§9. Logging.

+

§11. Logging.

-void CGTokens::log(cg_token *cgt) {
+void CGTokens::log(cg_token *cgt) {
     if (cgt == NULL) LOG("<no-cgt>");
     else {
         LOG("<CGT%d:%W", cgt->allocation_id, cgt->text_of_token);
@@ -346,7 +384,7 @@ expected.
     }
 }
 
-

§10. Parsing nonliteral tokens. Unless a token is literal and in double-quotes, it will start out as having +

§12. Parsing nonliteral tokens. Unless a token is literal and in double-quotes, it will start out as having UNDETERMINED_GTC until we investigate what the words in it mean, which we will do with the following Preform grammar.

@@ -358,21 +396,21 @@ return pointer is a (non-null) description of what the token matches.
 <grammar-token> ::=
-    <named-grammar-token> |       ==> Apply the command grammar10.1
+    <named-grammar-token> |       ==> Apply the command grammar12.1
     any things |                  ==> { ANY_THINGS_GTC, Specifications::from_kind(K_thing) }
     any <s-description> |         ==> { ANY_STUFF_GTC, RP[1] }
     anything |                    ==> { ANY_STUFF_GTC, Specifications::from_kind(K_thing) }
     anybody |                     ==> { ANY_STUFF_GTC, Specifications::from_kind(K_person) }
     anyone |                      ==> { ANY_STUFF_GTC, Specifications::from_kind(K_person) }
     anywhere |                    ==> { ANY_STUFF_GTC, Specifications::from_kind(K_room) }
-    something related by reversed <relation-name> |   ==> Apply the reversed relation10.2
-    something related by <relation-name> |            ==> Apply the relation10.3
-    something related by ... |    ==> Issue PM_GrammarBadRelation problem10.4
+    something related by reversed <relation-name> |   ==> Apply the reversed relation12.2
+    something related by <relation-name> |            ==> Apply the relation12.3
+    something related by ... |    ==> Issue PM_GrammarBadRelation problem12.4
     <standard-grammar-token> |    ==> { pass 1 }
     <definite-article> <k-kind> | ==> { STUFF_GTC, Specifications::from_kind(RP[2]) }
     <s-description> |             ==> { STUFF_GTC, RP[1] }
-    <s-type-expression> |         ==> Issue PM_BizarreToken problem10.9
-    ...                           ==> Issue PM_UnknownToken problem10.10
+    <s-type-expression> |         ==> Issue PM_BizarreToken problem12.9
+    ...                           ==> Issue PM_UnknownToken problem12.10
 
 <standard-grammar-token> ::=
     something |                 ==> { NOUN_TOKEN_GTC, Specifications::from_kind(K_object) }
@@ -384,12 +422,12 @@ return pointer is a (non-null) description of what the token matches.
     someone |                   ==> { CREATURE_TOKEN_GTC, Specifications::from_kind(K_object) }
     somebody |                  ==> { CREATURE_TOKEN_GTC, Specifications::from_kind(K_object) }
     text |                      ==> { TOPIC_TOKEN_GTC, Specifications::from_kind(K_understanding) }
-    topic |                     ==> Issue PM_UseTextNotTopic problem10.5
-    a topic |                   ==> Issue PM_UseTextNotTopic problem10.5
-    object |                    ==> Issue PM_UseThingNotObject problem10.6
-    an object |                 ==> Issue PM_UseThingNotObject problem10.6
-    something held |            ==> Issue something held problem message10.7
-    things held                 ==> Issue things held problem message10.8
+    topic |                     ==> Issue PM_UseTextNotTopic problem12.5
+    a topic |                   ==> Issue PM_UseTextNotTopic problem12.5
+    object |                    ==> Issue PM_UseThingNotObject problem12.6
+    an object |                 ==> Issue PM_UseThingNotObject problem12.6
+    something held |            ==> Issue something held problem message12.7
+    things held                 ==> Issue things held problem message12.8
 
 <named-grammar-token> internal {
     command_grammar *cg = CommandGrammars::named_token_by_name(W);
@@ -401,28 +439,28 @@ return pointer is a (non-null) description of what the token matches.
 }
 
-

§10.1. Apply the command grammar10.1 = +

§12.1. Apply the command grammar12.1 =

     ==> { NAMED_TOKEN_GTC, ParsingPlugin::rvalue_from_command_grammar(RP[1]) }
 
- -

§10.2. Apply the reversed relation10.2 = +

+

§12.2. Apply the reversed relation12.2 =

     ==> { RELATED_GTC, Rvalues::from_binary_predicate(BinaryPredicates::get_reversal(RP[1])) }
 
- -

§10.3. Apply the relation10.3 = +

+

§12.3. Apply the relation12.3 =

     ==> { RELATED_GTC, Rvalues::from_binary_predicate(RP[1]) }
 
- -

§10.4. Issue PM_GrammarBadRelation problem10.4 = +

+

§12.4. Issue PM_GrammarBadRelation problem12.4 =

@@ -436,8 +474,8 @@ return pointer is a (non-null) description of what the token matches.
     Problems::issue_problem_end();
     ==> { RELATED_GTC, Rvalues::from_binary_predicate(R_equality) }
 
- -

§10.5. Issue PM_UseTextNotTopic problem10.5 = +

+

§12.5. Issue PM_UseTextNotTopic problem12.5 =

@@ -457,8 +495,8 @@ return pointer is a (non-null) description of what the token matches.
     Problems::issue_problem_end();
     ==> { TOPIC_TOKEN_GTC, Specifications::from_kind(K_understanding) };
 
- -

§10.6. Issue PM_UseThingNotObject problem10.6 = +

+

§12.6. Issue PM_UseThingNotObject problem12.6 =

@@ -473,26 +511,26 @@ return pointer is a (non-null) description of what the token matches.
     Problems::issue_problem_end();
     ==> { MULTI_TOKEN_GTC, Specifications::from_kind(K_object) }
 
- -

§10.7. Issue something held problem message10.7 = +

+

§12.7. Issue something held problem message12.7 =

-    CGTokens::incompatible_change_problem(
+    CGTokens::incompatible_change_problem(
         "something held", "something", "something preferably held");
     ==> { HELD_TOKEN_GTC, Specifications::from_kind(K_object) }
 
- -

§10.8. Issue things held problem message10.8 = +

+

§12.8. Issue things held problem message12.8 =

-    CGTokens::incompatible_change_problem(
+    CGTokens::incompatible_change_problem(
             "things held", "things", "things preferably held");
     ==> { MULTIHELD_TOKEN_GTC, Specifications::from_kind(K_object) }
 
- -

§10.9. Issue PM_BizarreToken problem10.9 = +

+

§12.9. Issue PM_BizarreToken problem12.9 =

@@ -508,8 +546,8 @@ return pointer is a (non-null) description of what the token matches.
     Problems::issue_problem_end();
     ==> { STUFF_GTC, Specifications::from_kind(K_thing) }
 
- -

§10.10. Issue PM_UnknownToken problem10.10 = +

+

§12.10. Issue PM_UnknownToken problem12.10 =

@@ -523,13 +561,13 @@ return pointer is a (non-null) description of what the token matches.
     Problems::issue_problem_end();
     ==> { STUFF_GTC, Specifications::from_kind(K_thing) }
 
- -

§11. Something of an extended mea culpa: but it had the desired effect, in +

+

§13. Something of an extended mea culpa: but it had the desired effect, in that nobody complained about what might have been a controversial change.

-void CGTokens::incompatible_change_problem(char *token_tried, char *token_instead,
+void CGTokens::incompatible_change_problem(char *token_tried, char *token_instead,
     char *token_better) {
     Problems::quote_source(1, current_sentence);
     Problems::quote_text(2, token_tried);
@@ -555,7 +593,7 @@ that nobody complained about what might have been a controversial change.
     Problems::issue_problem_end();
 }
 
-

§12. Determining. To calculate a description of what is being described by a token, then, we +

§14. Determining. To calculate a description of what is being described by a token, then, we call the following function, which delegates to <grammar-token> above.

@@ -566,10 +604,10 @@ pointer returned by <grammar-token> is the result.

-parse_node *CGTokens::determine(cg_token *cgt, int depth) {
-    if (CGTokens::is_literal(cgt)) return NULL;
+parse_node *CGTokens::determine(cg_token *cgt, int depth) {
+    if (CGTokens::is_literal(cgt)) return NULL;
 
-    <grammar-token>(CGTokens::text(cgt));
+    <grammar-token>(CGTokens::text(cgt));
     cgt->grammar_token_code = <<r>>;
     parse_node *result = <<rp>>;
 
@@ -579,11 +617,11 @@ pointer returned by <grammar-token> is the result.
             result = CommandGrammars::determine(cgt->defined_by, depth+1);
             break;
         case ANY_STUFF_GTC:
-            Make sure the result is a description with one free variable12.1;
+            Make sure the result is a description with one free variable14.1;
             cgt->noun_filter = NounFilterTokens::new(result, TRUE, FALSE);
             break;
         case ANY_THINGS_GTC:
-            Make sure the result is a description with one free variable12.1;
+            Make sure the result is a description with one free variable14.1;
             cgt->noun_filter = NounFilterTokens::new(result, TRUE, TRUE);
             break;
         case RELATED_GTC:
@@ -593,26 +631,26 @@ pointer returned by <grammar-token> is the result.
             result = Specifications::from_kind(K);
             break;
         case STUFF_GTC:
-            Make sure the result is a description with one free variable12.1;
+            Make sure the result is a description with one free variable14.1;
             cgt->noun_filter = NounFilterTokens::new(result, FALSE, FALSE);
             break;
         default:
-            Node::set_text(result, CGTokens::text(cgt));
+            Node::set_text(result, CGTokens::text(cgt));
             break;
     }
 
-    if (result) Vet the grammar token determination for parsability at run-time12.2;
+    if (result) Vet the grammar token determination for parsability at run-time14.2;
     cgt->what_token_describes = result;
     return cgt->what_token_describes;
 }
 
-

§12.1. If the token determines an actual constant value — as it can when it is a +

§14.1. If the token determines an actual constant value — as it can when it is a named token which always refers to a specific thing, for example — it is possible for result not to be a description. Otherwise, though, it has to be a description which is true or false for any given value, so:

-

Make sure the result is a description with one free variable12.1 = +

Make sure the result is a description with one free variable14.1 =

@@ -625,8 +663,8 @@ be a description which is true or false for any given value, so:
         result = Specifications::from_kind(K_object);
     }
 
- -

§12.2. Vet the grammar token determination for parsability at run-time12.2 = +

+

§14.2. Vet the grammar token determination for parsability at run-time14.2 =

@@ -637,7 +675,7 @@ be a description which is true or false for any given value, so:
             (Kinds::eq(K, K_understanding) == FALSE) &&
             (RTKindConstructors::request_I6_GPR(K) == FALSE)) {
             Problems::quote_source(1, current_sentence);
-            Problems::quote_wording(2, CGTokens::text(cgt));
+            Problems::quote_wording(2, CGTokens::text(cgt));
             StandardProblems::handmade_problem(Task::syntax_tree(), _p_(PM_UnparsableKind));
             Problems::issue_problem_segment(
                 "The grammar token '%2' in the sentence %1 invites me to understand "
@@ -650,8 +688,8 @@ be a description which is true or false for any given value, so:
         }
     }
 
- -

§13. Scoring. This score is needed when sorting CG lines in order of applicability: see the +

+

§15. Scoring. This score is needed when sorting CG lines in order of applicability: see the discussion at CGLines::cgl_determine. The function must return a value which is at least 0 but strictly less than CGL_SCORE_TOKEN_RANGE. The general idea is that higher scores cause tokens to take precedence over lower @@ -659,7 +697,7 @@ ones.

-int CGTokens::score_bonus(cg_token *cgt) {
+int CGTokens::score_bonus(cg_token *cgt) {
     if (cgt == NULL) internal_error("no cgt");
     if (cgt->grammar_token_code == UNDETERMINED_GTC) internal_error("undetermined");
     int gtc = cgt->grammar_token_code;
@@ -677,17 +715,17 @@ ones.
     return 1;
 }
 
-

§14. Verification. This function checks that it's okay to compile the given token, and returns the +

§16. Verification. This function checks that it's okay to compile the given token, and returns the kind of value produced, if any is, or NULL if it isn't. The kind returned is not significant if a problem is generated.

-kind *CGTokens::verify_and_find_kind(cg_token *cgt, int code_mode, int consult_mode) {
-    if (CGTokens::is_literal(cgt)) return NULL;
+kind *CGTokens::verify_and_find_kind(cg_token *cgt, int code_mode, int consult_mode) {
+    if (CGTokens::is_literal(cgt)) return NULL;
 
     if (cgt->token_relation) {
-        CGTokens::verify_relation_token(cgt->token_relation, CGTokens::text(cgt));
+        CGTokens::verify_relation_token(cgt->token_relation, CGTokens::text(cgt));
         return NULL;
     }
 
@@ -701,7 +739,7 @@ not significant if a problem is generated.
 
     if (Descriptions::is_complex(spec)) {
         Problems::quote_source(1, current_sentence);
-        Problems::quote_wording(2, CGTokens::text(cgt));
+        Problems::quote_wording(2, CGTokens::text(cgt));
         StandardProblems::handmade_problem(Task::syntax_tree(), _p_(PM_OverComplexToken));
         Problems::issue_problem_segment(
             "The grammar you give in %1 contains a token which is just too complicated - "
@@ -710,7 +748,7 @@ not significant if a problem is generated.
         Problems::issue_problem_end();
     }
 
-    if ((consult_mode) && (CGTokens::is_topic(cgt)))
+    if ((consult_mode) && (CGTokens::is_topic(cgt)))
         StandardProblems::sentence_problem(Task::syntax_tree(),
             _p_(PM_TextTokenRestricted),
             "the '[text]' token is not allowed with 'matches' or in table columns",
@@ -723,14 +761,14 @@ not significant if a problem is generated.
     return NULL;
 }
 
-

§15. Relational tokens are the hardest to cope with at runtime, not least because +

§17. Relational tokens are the hardest to cope with at runtime, not least because Inform has so many different implementations for different relations, and not every relation can legally be used. The following function polices that — either doing nothing (okay) or issuing exactly one problem message (not okay).

-void CGTokens::verify_relation_token(binary_predicate *bp, wording W) {
+void CGTokens::verify_relation_token(binary_predicate *bp, wording W) {
     if (bp == R_equality) {
         Problems::quote_source(1, current_sentence);
         Problems::quote_wording(2, W);
diff --git a/docs/if-module/5-pp.html b/docs/if-module/5-pp.html
index 27827a890..d19eaedbb 100644
--- a/docs/if-module/5-pp.html
+++ b/docs/if-module/5-pp.html
@@ -158,9 +158,9 @@ the following functions to handle its constant rvalues. These correspond to
     return FALSE;
 }
 
-parse_node *ParsingPlugin::rvalue_from_command_grammar(command_grammar *val) {
+parse_node *ParsingPlugin::rvalue_from_command_grammar(command_grammar *val) {
         CONV_FROM(command_grammar, K_understanding) }
-command_grammar *ParsingPlugin::rvalue_to_command_grammar(parse_node *spec) {
+command_grammar *ParsingPlugin::rvalue_to_command_grammar(parse_node *spec) {
         CONV_TO(command_grammar) }
 

§5. A number of global variables are given special treatment by this plugin, diff --git a/docs/if-module/5-us.html b/docs/if-module/5-us.html index 522561603..2135d9144 100644 --- a/docs/if-module/5-us.html +++ b/docs/if-module/5-us.html @@ -409,9 +409,8 @@ integer result is 0 if no problems were thrown, or -1 if they were.

     ur_being_parsed.cg_result = CG_IS_TOKEN;
-    ur_being_parsed.token_text = Feeds::feed_C_string_full(
-        Lexer::word_text(Wordings::first_wn(ur_being_parsed.reference_text)),
-        TRUE, GRAMMAR_PUNCTUATION_MARKS);
+    ur_being_parsed.token_text = CGTokens::break(
+        Lexer::word_text(Wordings::first_wn(ur_being_parsed.reference_text)), TRUE);
 

§10.6. Reverse10.6 = @@ -1088,7 +1087,7 @@ and add it to a suitably chosen CG with int np = problem_count; - tokens = CGTokens::tokenise(W); + tokens = CGTokens::tokenise(W); if (problem_count > np) return;

@@ -1119,8 +1118,8 @@ and add it to a suitably chosen CG with break; case CG_IS_COMMAND: { wording command_W = EMPTY_WORDING; implies the no verb verb - if (CGTokens::is_literal(tokens)) - command_W = Wordings::first_word(CGTokens::text(tokens)); + if (CGTokens::is_literal(tokens)) + command_W = Wordings::first_word(CGTokens::text(tokens)); LOGIF(GRAMMAR_CONSTRUCTION, "Add to command grammar of command '%W': ", command_W); cg = CommandGrammars::for_command_verb_creating(command_W); break; diff --git a/docs/words-module/3-fds.html b/docs/words-module/3-fds.html index aa42e2675..861a48edb 100644 --- a/docs/words-module/3-fds.html +++ b/docs/words-module/3-fds.html @@ -87,7 +87,7 @@ with the corresponding ID back again.
 wording Feeds::feed_C_string(wchar_t *text) {
-    return Feeds::feed_C_string_full(text, FALSE, NULL);
+    return Feeds::feed_C_string_full(text, FALSE, NULL, FALSE);
 }
 
 wording Feeds::feed_text(text_stream *text) {
@@ -95,7 +95,7 @@ with the corresponding ID back again.
 }
 
 wording Feeds::feed_C_string_expanding_strings(wchar_t *text) {
-    return Feeds::feed_C_string_full(text, TRUE, NULL);
+    return Feeds::feed_C_string_full(text, TRUE, NULL, FALSE);
 }
 
 wording Feeds::feed_text_expanding_strings(text_stream *text) {
@@ -112,8 +112,10 @@ function, written two ways:
 

-wording Feeds::feed_C_string_full(wchar_t *text, int expand, wchar_t *nonstandard) {
+wording Feeds::feed_C_string_full(wchar_t *text, int expand, wchar_t *nonstandard,
+    int break_at_slashes) {
     Set up the lexer4.1;
+    lexer_break_at_slashes = break_at_slashes;
     for (int i=0; text[i] != 0; i++) {
         int last_cr, cr, next_cr;
         if (i > 0) last_cr = text[i-1]; else last_cr = EOF;
diff --git a/docs/words-module/3-lxr.html b/docs/words-module/3-lxr.html
index 2be1190f2..4fa0d9299 100644
--- a/docs/words-module/3-lxr.html
+++ b/docs/words-module/3-lxr.html
@@ -451,6 +451,7 @@ so that subsequent words are lexed as usual.
 int lexer_divide_strings_at_text_substitutions;  Break up text substitutions in quoted text
 int lexer_allow_I6_escapes;  Recognise (- and -)
 int lexer_wait_for_dashes;  Ignore all text until first ---- found
+int lexer_break_at_slashes;
 

§17. Definition of punctuation. As we have seen, the question of whether something is a punctuation mark or not depends slightly on the context: @@ -630,6 +631,7 @@ always being "off"). lexer_punctuation_marks = STANDARD_PUNCTUATION_MARKS; lexer_divide_strings_at_text_substitutions = FALSE; lexer_allow_I6_escapes = TRUE; + lexer_break_at_slashes = FALSE; reset the internal states lxs_most_significant_space_char = '\n'; we imagine each lexer feed starting a new line @@ -788,8 +790,8 @@ most common URLs glue up as single words.) if (Lexer::is_punctuation(cr)) space = TRUE; if ((space) && (lxs_literal_mode)) space = FALSE; if ((space) && (cr != '[') && (cr != ']')) { - if ((space) && (next_cr == '/')) space = FALSE; - if (space) { + if (next_cr == '/') space = FALSE; + else { int lc = 0, nc = 0; if (Characters::isdigit((wchar_t) last_cr)) lc = 1; if ((last_cr >= 'a') && (last_cr <= 'z')) lc = 2; @@ -798,6 +800,7 @@ most common URLs glue up as single words.) if ((next_cr >= 'a') && (next_cr <= 'z')) nc = 2; if ((lc == 1) && (nc == 1)) space = FALSE; if ((cr == '.') && (lc > 0) && (nc > 0)) space = FALSE; + if ((lexer_break_at_slashes) && (cr == '/')) space = TRUE; } } if (space) { diff --git a/docs/words-module/4-lp.html b/docs/words-module/4-lp.html index 21b673bbe..f983eb70c 100644 --- a/docs/words-module/4-lp.html +++ b/docs/words-module/4-lp.html @@ -646,7 +646,7 @@ So, for example, AW if ((k > 0) && (p[k] == '/')) breakme = TRUE; } - if (breakme) AW = Feeds::feed_C_string_full(p, FALSE, L"/"); break only at slashes + if (breakme) AW = Feeds::feed_C_string_full(p, FALSE, L"/", FALSE); break only at slashes

§15.1.1. Intercept /a/ to /z/ and /aa/ to /zz/, which don't make ptokens at diff --git a/inform7/Figures/timings-diagnostics.txt b/inform7/Figures/timings-diagnostics.txt index 53d2242c6..5a5987fa5 100644 --- a/inform7/Figures/timings-diagnostics.txt +++ b/inform7/Figures/timings-diagnostics.txt @@ -1,33 +1,33 @@ 100.0% in inform7 run - 71.7% in compilation to Inter - 51.1% in //Sequence::undertake_queued_tasks// - 4.4% in //MajorNodes::pre_pass// - 3.6% in //MajorNodes::pass_1// + 71.3% in compilation to Inter + 50.8% in //Sequence::undertake_queued_tasks// + 4.5% in //MajorNodes::pre_pass// + 3.5% in //MajorNodes::pass_1// 1.9% in //ImperativeDefinitions::assess_all// 1.5% in //RTKindConstructors::compile// - 1.5% in //RTPhrasebook::compile_entries// - 1.1% in //Sequence::lint_inter// + 1.3% in //RTPhrasebook::compile_entries// + 0.9% in //Sequence::lint_inter// 0.5% in //MajorNodes::pass_2// - 0.5% in //Sequence::undertake_queued_tasks// 0.5% in //World::stage_V// 0.3% in //ImperativeDefinitions::compile_first_block// 0.3% in //Sequence::undertake_queued_tasks// + 0.3% in //Sequence::undertake_queued_tasks// 0.1% in //CompletionModule::compile// 0.1% in //InferenceSubjects::emit_all// 0.1% in //RTKindConstructors::compile_permissions// 0.1% in //Task::make_built_in_kind_constructors// 0.1% in //World::stages_II_and_III// - 2.8% not specifically accounted for - 25.5% in running Inter pipeline - 10.2% in step 14/15: generate inform6 -> auto.inf - 5.4% in step 6/15: make-synoptic-module - 5.2% in step 5/15: load-binary-kits - 1.5% in step 9/15: make-identifiers-unique + 3.0% not specifically accounted for + 25.6% in running Inter pipeline + 10.1% in step 14/15: generate inform6 -> auto.inf + 5.5% in step 5/15: load-binary-kits + 5.3% in step 6/15: make-synoptic-module + 1.3% in step 9/15: make-identifiers-unique 0.3% in step 12/15: eliminate-redundant-operations 0.3% in step 4/15: compile-splats 0.3% in step 7/15: shorten-wiring 0.3% in step 8/15: detect-indirect-calls 0.1% in step 11/15: eliminate-redundant-labels - 1.3% not specifically accounted for - 2.3% in supervisor + 1.4% not specifically accounted for + 2.5% in supervisor 0.4% not specifically accounted for diff --git a/inform7/Tests/Test Problems/PM_SlashCutsDigits.txt b/inform7/Tests/Test Problems/PM_SlashCutsDigits.txt new file mode 100644 index 000000000..c9361486e --- /dev/null +++ b/inform7/Tests/Test Problems/PM_SlashCutsDigits.txt @@ -0,0 +1,5 @@ +Lab is a room. + +The camera is in the lab. + +Understand "new/-- br80/08/-- camera" as the camera. diff --git a/inform7/Tests/Test Problems/_Results_Ideal/PM_SlashCutsDigits.txt b/inform7/Tests/Test Problems/_Results_Ideal/PM_SlashCutsDigits.txt new file mode 100644 index 000000000..88bac33c5 --- /dev/null +++ b/inform7/Tests/Test Problems/_Results_Ideal/PM_SlashCutsDigits.txt @@ -0,0 +1,19 @@ +Inform 7 v10.2.0 has started. +I've now read your source text, which is 17 words long. +I've also read Basic Inform by Graham Nelson, which is 7691 words long. +I've also read English Language by Graham Nelson, which is 2328 words long. +I've also read Standard Rules by Graham Nelson, which is 32164 words long. +Problem__ PM_SlashCutsDigits + >--> You wrote 'Understand "new/-- br80/08/-- camera" as the camera' (source + text, line 5): but 'understand' uses a slash '/' here in a way which cuts + off something which contains only digits, and this will not do anything + good. (Note that a slash in grammar like this means an alternative choice + of word.) +Problem__ PM_UnderstandEmptyText + >--> You wrote 'Understand "new/-- br80/08/-- camera" as the camera' (source + text, line 5): but 'understand' should be followed by text which contains + at least one word or square-bracketed token, so for instance 'understand + "take [something]" as taking' is fine, but 'understand "" as the fog' is + not. The same applies to the contents of 'topic' columns in tables, since + those are also instructions for understanding. +Inform 7 has finished. diff --git a/inform7/if-module/Chapter 5/Command Grammar Tokens.w b/inform7/if-module/Chapter 5/Command Grammar Tokens.w index 67fef1b1a..e6e233446 100644 --- a/inform7/if-module/Chapter 5/Command Grammar Tokens.w +++ b/inform7/if-module/Chapter 5/Command Grammar Tokens.w @@ -55,36 +55,24 @@ mark: thus "get away/off/out" becomes @d GRAMMAR_PUNCTUATION_MARKS L".,:;?!(){}[]/" /* note the slash */ = -cg_token *CGTokens::tokenise(wording W) { - wchar_t *as_wide_string = Lexer::word_text(Wordings::first_wn(W)); +wording CGTokens::break(wchar_t *text, int expand) { @; - wording TW = Feeds::feed_C_string_full(as_wide_string, TRUE, - GRAMMAR_PUNCTUATION_MARKS); + wording TW = Feeds::feed_C_string_full(text, expand, + GRAMMAR_PUNCTUATION_MARKS, TRUE); @; - - cg_token *tokens = CGTokens::break_into_tokens(TW); - if (tokens == NULL) { - StandardProblems::sentence_problem(Task::syntax_tree(), - _p_(PM_UnderstandEmptyText), - "'understand' should be followed by text which contains at least " - "one word or square-bracketed token", - "so for instance 'understand \"take [something]\" as taking' is fine, " - "but 'understand \"\" as the fog' is not. The same applies to the contents " - "of 'topic' columns in tables, since those are also instructions for " - "understanding."); - } - return tokens; + @; + return TW; } @ = int skip = FALSE, literal_punct = FALSE; - for (int i=0; as_wide_string[i]; i++) { - if (as_wide_string[i] == '[') skip = TRUE; - if (as_wide_string[i] == ']') skip = FALSE; + for (int i=0; text[i]; i++) { + if (text[i] == '[') skip = TRUE; + if (text[i] == ']') skip = FALSE; if (skip) continue; - if ((as_wide_string[i] == '.') || (as_wide_string[i] == ',') || - (as_wide_string[i] == '!') || (as_wide_string[i] == '?') || - (as_wide_string[i] == ':') || (as_wide_string[i] == ';')) + if ((text[i] == '.') || (text[i] == ',') || + (text[i] == '!') || (text[i] == '?') || + (text[i] == ':') || (text[i] == ';')) literal_punct = TRUE; } if (literal_punct) { @@ -93,7 +81,7 @@ cg_token *CGTokens::tokenise(wording W) { "or more specifically cannot contain any of these: . , ! ? : ; since they " "are already used in various ways by the parser, and would not correctly " "match here."); - return NULL; + return EMPTY_WORDING; } @ = @@ -112,9 +100,53 @@ cg_token *CGTokens::tokenise(wording W) { "brackets, this problem message is also sometimes seen " "if empty square brackets are used, as in 'Understand " "\"bless []\" as blessing.'"); - return NULL; + return EMPTY_WORDING; } +@ = + LOOP_THROUGH_WORDING(i, TW) + if (Lexer::word(i) == FORWARDSLASH_V) + if (((i < Wordings::last_wn(TW)) && (CGTokens::numerical(i+1))) || + ((i > Wordings::first_wn(TW)) && (CGTokens::numerical(i-1)))) { + StandardProblems::sentence_problem(Task::syntax_tree(), + _p_(PM_SlashCutsDigits), + "'understand' uses a slash '/' here in a way which cuts off something " + "which contains only digits", + "and this will not do anything good. (Note that a slash in grammar " + "like this means an alternative choice of word.)"); + return EMPTY_WORDING; + } + +@ = +int CGTokens::numerical(int wn) { + wchar_t *text = Lexer::word_text(wn); + for (int i=0; i = ur_being_parsed.cg_result = CG_IS_TOKEN; - ur_being_parsed.token_text = Feeds::feed_C_string_full( - Lexer::word_text(Wordings::first_wn(ur_being_parsed.reference_text)), - TRUE, GRAMMAR_PUNCTUATION_MARKS); + ur_being_parsed.token_text = CGTokens::break( + Lexer::word_text(Wordings::first_wn(ur_being_parsed.reference_text)), TRUE); @ = ur_being_parsed.reversed_reference = TRUE; diff --git a/services/words-module/Chapter 3/Feeds.w b/services/words-module/Chapter 3/Feeds.w index 6ac8bb7e1..84fee3c94 100644 --- a/services/words-module/Chapter 3/Feeds.w +++ b/services/words-module/Chapter 3/Feeds.w @@ -28,7 +28,7 @@ Some variations on a theme: = wording Feeds::feed_C_string(wchar_t *text) { - return Feeds::feed_C_string_full(text, FALSE, NULL); + return Feeds::feed_C_string_full(text, FALSE, NULL, FALSE); } wording Feeds::feed_text(text_stream *text) { @@ -36,7 +36,7 @@ wording Feeds::feed_text(text_stream *text) { } wording Feeds::feed_C_string_expanding_strings(wchar_t *text) { - return Feeds::feed_C_string_full(text, TRUE, NULL); + return Feeds::feed_C_string_full(text, TRUE, NULL, FALSE); } wording Feeds::feed_text_expanding_strings(text_stream *text) { @@ -52,8 +52,10 @@ wording Feeds::feed_text_punctuated(text_stream *text, wchar_t *pmarks) { function, written two ways: = -wording Feeds::feed_C_string_full(wchar_t *text, int expand, wchar_t *nonstandard) { +wording Feeds::feed_C_string_full(wchar_t *text, int expand, wchar_t *nonstandard, + int break_at_slashes) { @; + lexer_break_at_slashes = break_at_slashes; for (int i=0; text[i] != 0; i++) { int last_cr, cr, next_cr; if (i > 0) last_cr = text[i-1]; else last_cr = EOF; diff --git a/services/words-module/Chapter 3/Lexer.w b/services/words-module/Chapter 3/Lexer.w index e0171a4e3..419c3e1d5 100644 --- a/services/words-module/Chapter 3/Lexer.w +++ b/services/words-module/Chapter 3/Lexer.w @@ -367,6 +367,7 @@ wchar_t *lexer_punctuation_marks = L""; int lexer_divide_strings_at_text_substitutions; /* Break up text substitutions in quoted text */ int lexer_allow_I6_escapes; /* Recognise |(-| and |-)| */ int lexer_wait_for_dashes; /* Ignore all text until first |----| found */ +int lexer_break_at_slashes; @h Definition of punctuation. As we have seen, the question of whether something is a punctuation mark @@ -543,6 +544,7 @@ void Lexer::reset_lexer(void) { lexer_punctuation_marks = STANDARD_PUNCTUATION_MARKS; lexer_divide_strings_at_text_substitutions = FALSE; lexer_allow_I6_escapes = TRUE; + lexer_break_at_slashes = FALSE; /* reset the internal states */ lxs_most_significant_space_char = '\n'; /* we imagine each lexer feed starting a new line */ @@ -683,8 +685,8 @@ void Lexer::feed_triplet(int last_cr, int cr, int next_cr) { if (Lexer::is_punctuation(cr)) space = TRUE; if ((space) && (lxs_literal_mode)) space = FALSE; if ((space) && (cr != '[') && (cr != ']')) { - if ((space) && (next_cr == '/')) space = FALSE; - if (space) { + if (next_cr == '/') space = FALSE; + else { int lc = 0, nc = 0; if (Characters::isdigit((wchar_t) last_cr)) lc = 1; if ((last_cr >= 'a') && (last_cr <= 'z')) lc = 2; @@ -693,6 +695,7 @@ void Lexer::feed_triplet(int last_cr, int cr, int next_cr) { if ((next_cr >= 'a') && (next_cr <= 'z')) nc = 2; if ((lc == 1) && (nc == 1)) space = FALSE; if ((cr == '.') && (lc > 0) && (nc > 0)) space = FALSE; + if ((lexer_break_at_slashes) && (cr == '/')) space = TRUE; } } if (space) { diff --git a/services/words-module/Chapter 4/Loading Preform.w b/services/words-module/Chapter 4/Loading Preform.w index e039f1318..9b1329362 100644 --- a/services/words-module/Chapter 4/Loading Preform.w +++ b/services/words-module/Chapter 4/Loading Preform.w @@ -510,7 +510,7 @@ So, for example, |AW| might then end up as |onions|, |/|, |shallots|. if ((k > 0) && (p[k] == '/')) breakme = TRUE; } - if (breakme) AW = Feeds::feed_C_string_full(p, FALSE, L"/"); /* break only at slashes */ + if (breakme) AW = Feeds::feed_C_string_full(p, FALSE, L"/", FALSE); /* break only at slashes */ @ Intercept |/a/| to |/z/| and |/aa/| to |/zz/|, which don't make ptokens at all, but simply change the production's match number.