mirror of
https://github.com/ganelson/inform.git
synced 2024-07-09 02:24:21 +03:00
744 lines
107 KiB
HTML
744 lines
107 KiB
HTML
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
|
<html>
|
|
<head>
|
|
<title>Excerpt Meanings</title>
|
|
<link href="../docs-assets/Breadcrumbs.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<meta name="viewport" content="width=device-width initial-scale=1">
|
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
<meta http-equiv="Content-Language" content="en-gb">
|
|
|
|
<link href="../docs-assets/Contents.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<link href="../docs-assets/Progress.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<link href="../docs-assets/Navigation.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<link href="../docs-assets/Fonts.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<link href="../docs-assets/Base.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<script>
|
|
function togglePopup(material_id) {
|
|
var popup = document.getElementById(material_id);
|
|
popup.classList.toggle("show");
|
|
}
|
|
</script>
|
|
|
|
<link href="../docs-assets/Popups.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<script>
|
|
MathJax = {
|
|
tex: {
|
|
inlineMath: '$', '$'], ['\\(', '\\)'
|
|
},
|
|
svg: {
|
|
fontCache: 'global'
|
|
}
|
|
};
|
|
</script>
|
|
<script type="text/javascript" id="MathJax-script" async
|
|
src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-svg.js">
|
|
</script>
|
|
|
|
<link href="../docs-assets/Colours.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
|
|
</head>
|
|
<body class="commentary-font">
|
|
<nav role="navigation">
|
|
<h1><a href="../index.html">
|
|
<img src="../docs-assets/Inform.png" height=72">
|
|
</a></h1>
|
|
<ul><li><a href="../compiler.html">compiler tools</a></li>
|
|
<li><a href="../other.html">other tools</a></li>
|
|
<li><a href="../extensions.html">extensions and kits</a></li>
|
|
<li><a href="../units.html">unit test tools</a></li>
|
|
</ul><h2>Compiler Webs</h2><ul>
|
|
<li><a href="../inbuild/index.html">inbuild</a></li>
|
|
<li><a href="../inform7/index.html">inform7</a></li>
|
|
<li><a href="../inter/index.html">inter</a></li>
|
|
</ul><h2>Inbuild Modules</h2><ul>
|
|
<li><a href="../supervisor-module/index.html">supervisor</a></li>
|
|
</ul><h2>Inform7 Modules</h2><ul>
|
|
<li><a href="../core-module/index.html">core</a></li>
|
|
<li><a href="../assertions-module/index.html">assertions</a></li>
|
|
<li><a href="../values-module/index.html">values</a></li>
|
|
<li><a href="../knowledge-module/index.html">knowledge</a></li>
|
|
<li><a href="../imperative-module/index.html">imperative</a></li>
|
|
<li><a href="../runtime-module/index.html">runtime</a></li>
|
|
<li><a href="../if-module/index.html">if</a></li>
|
|
<li><a href="../multimedia-module/index.html">multimedia</a></li>
|
|
<li><a href="../index-module/index.html">index</a></li>
|
|
</ul><h2>Inter Modules</h2><ul>
|
|
<li><a href="../bytecode-module/index.html">bytecode</a></li>
|
|
<li><a href="../building-module/index.html">building</a></li>
|
|
<li><a href="../codegen-module/index.html">codegen</a></li>
|
|
</ul><h2>Services</h2><ul>
|
|
<li><a href="../arch-module/index.html">arch</a></li>
|
|
<li><a href="../calculus-module/index.html">calculus</a></li>
|
|
<li><a href="../html-module/index.html">html</a></li>
|
|
<li><a href="../inflections-module/index.html">inflections</a></li>
|
|
<li><a href="../kinds-module/index.html">kinds</a></li>
|
|
<li><a href="../linguistics-module/index.html">linguistics</a></li>
|
|
<li><a href="../problems-module/index.html">problems</a></li>
|
|
<li><a href="../syntax-module/index.html">syntax</a></li>
|
|
<li><a href="../words-module/index.html">words</a></li>
|
|
<li><a href="../../../inweb/docs/foundation-module/index.html">foundation</a></li>
|
|
|
|
</ul>
|
|
</nav>
|
|
<main role="main">
|
|
<!--Weave of 'Excerpt Meanings' generated by Inweb-->
|
|
<div class="breadcrumbs">
|
|
<ul class="crumbs"><li><a href="../index.html">Home</a></li><li><a href="../compiler.html">Services</a></li><li><a href="index.html">lexicon</a></li><li><a href="index.html#2">Chapter 2: Excerpts</a></li><li><b>Excerpt Meanings</b></li></ul></div>
|
|
<p class="purpose">To register and deregister meanings for excerpts of text as nouns, adjectives, imperative phrases and other usages.</p>
|
|
|
|
<ul class="toc"><li><a href="2-em.html#SP1">§1. Excerpt meanings</a></li><li><a href="2-em.html#SP2">§2. Meaning codes</a></li><li><a href="2-em.html#SP3">§3. Annotating words</a></li><li><a href="2-em.html#SP5">§5. Creating EMs</a></li><li><a href="2-em.html#SP6">§6. Debugging log</a></li><li><a href="2-em.html#SP7">§7. Hashing excerpts</a></li><li><a href="2-em.html#SP10">§10. EM Listing</a></li><li><a href="2-em.html#SP12">§12. Registration</a></li><li><a href="2-em.html#SP13">§13. Errors</a></li></ul><hr class="tocbar">
|
|
|
|
<p class="commentary firstcommentary"><a id="SP1" class="paragraph-anchor"></a><b>§1. Excerpt meanings. </b>We now define the <a href="2-em.html#SP1" class="internal">excerpt_meaning</a> data structure, which holds a single
|
|
entry in this what amounts to a dictionary. The text to be matched is specified
|
|
as a sequence of at least one, and at most 32, tokens: these can either be
|
|
pointers to specific vocabulary, or can be null, which implies that
|
|
arbitrary non-empty text can appear in the given position. It is forbidden
|
|
for the token list to contain two nulls in a row.
|
|
</p>
|
|
|
|
<p class="commentary">For instance, the token list:
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> drink # milk #</span>
|
|
</pre>
|
|
<p class="commentary">matches "drink more milk today and every day", but not "drink milk". The
|
|
sharp symbol <span class="extract"><span class="extract-syntax">#</span></span> is printed in place of a null token, both here and in the
|
|
debugging log.
|
|
</p>
|
|
|
|
<p class="commentary">Each excerpt meaning also comes with a hash code, which is automatically
|
|
generated from its token list, and a pointer to some structure.
|
|
</p>
|
|
|
|
<pre class="definitions code-font"><span class="definition-keyword">define</span> <span class="constant-syntax">MAX_TOKENS_PER_EXCERPT_MEANING</span><span class="plain-syntax"> </span><span class="constant-syntax">32</span>
|
|
</pre>
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">typedef</span><span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax"> {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">unsigned</span><span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax">; </span><span class="comment-syntax"> what kind of meaning: a single MC, not a bitmap</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="identifier-syntax">general_pointer</span><span class="plain-syntax"> </span><span class="identifier-syntax">data</span><span class="plain-syntax">; </span><span class="comment-syntax"> data structure being referred to</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">no_em_tokens</span><span class="plain-syntax">; </span><span class="comment-syntax"> length of token list</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="identifier-syntax">vocabulary_entry</span><span class="plain-syntax"> *</span><span class="identifier-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="constant-syntax">MAX_TOKENS_PER_EXCERPT_MEANING</span><span class="plain-syntax">]; </span><span class="comment-syntax"> token list</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">excerpt_hash</span><span class="plain-syntax">; </span><span class="comment-syntax"> hash code generated from the token list</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">CLASS_DEFINITION</span>
|
|
<span class="plain-syntax">} </span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax">;</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>The structure excerpt_meaning is accessed in 1/lxc, 2/pe and here.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP2" class="paragraph-anchor"></a><b>§2. Meaning codes. </b>These assign a context to a meaning, and so decide how the <span class="extract"><span class="extract-syntax">data</span></span> pointer for
|
|
an excerpt meaning is to interpreted. For instance, "Persian carpet" might
|
|
have a meaning with code <span class="extract"><span class="extract-syntax">NOUN_MC</span></span>.
|
|
</p>
|
|
|
|
<p class="commentary">Meaning codes are used in other contexts in Inform besides this one. There
|
|
are up to 31 of them and each is a distinct power of two; there is no
|
|
significance to their ordering. Integers are assumed at least 32 bits wide and
|
|
can therefore hold a bitmap representing any subset of these meaning codes;
|
|
using only 31 bits avoids any potential nuisance over the sign bit.
|
|
</p>
|
|
|
|
<p class="commentary">For instance, <span class="extract"><span class="extract-syntax">PROPERTY_MC + TABLE_MC</span></span> might mean "either a property name or
|
|
a table name". But the <span class="extract"><span class="extract-syntax">meaning_code</span></span> field of an <a href="2-em.html#SP1" class="internal">excerpt_meaning</a> is always
|
|
a pure power of 2, i.e., a single bit.
|
|
</p>
|
|
|
|
<pre class="definitions code-font"><span class="definition-keyword">define</span> <span class="constant-syntax">MISCELLANEOUS_MC</span><span class="plain-syntax"> </span><span class="constant-syntax">0x00000001</span><span class="plain-syntax"> </span><span class="comment-syntax"> a grab-bag of other possible nouns</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">NOUN_MC</span><span class="plain-syntax"> </span><span class="constant-syntax">0x00000002</span><span class="plain-syntax"> </span><span class="comment-syntax"> e.g., </span><span class="extract"><span class="extract-syntax">upright chair</span></span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">ADJECTIVE_MC</span><span class="plain-syntax"> </span><span class="constant-syntax">0x00000004</span><span class="plain-syntax"> </span><span class="comment-syntax"> e.g., </span><span class="extract"><span class="extract-syntax">invisible</span></span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP3" class="paragraph-anchor"></a><b>§3. Annotating words. </b>Each word in the vocabulary collected up by <a href="../words-module/index.html" class="internal">words</a> will be annotated with
|
|
an object of the following class:
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">typedef</span><span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="reserved-syntax">vocabulary_lexicon_data</span><span class="plain-syntax"> {</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">ifdef</span><span class="plain-syntax"> </span><span class="identifier-syntax">KINDS_MODULE</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="identifier-syntax">kind</span><span class="plain-syntax"> *</span><span class="identifier-syntax">one_word_kind</span><span class="plain-syntax">; </span><span class="comment-syntax"> ditto as a kind with single-word name</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">endif</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">start_list</span><span class="plain-syntax">; </span><span class="comment-syntax"> meanings starting with this</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">end_list</span><span class="plain-syntax">; </span><span class="comment-syntax"> meanings ending with this</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">middle_list</span><span class="plain-syntax">; </span><span class="comment-syntax"> meanings with this inside but at neither end</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">subset_list</span><span class="plain-syntax">; </span><span class="comment-syntax"> meanings allowing subsets which include this</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">subset_list_length</span><span class="plain-syntax">; </span><span class="comment-syntax"> number of meanings in the subset list</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">scanned_already</span><span class="plain-syntax">; </span><span class="comment-syntax"> used only for diagnostics</span>
|
|
<span class="plain-syntax">} </span><span class="reserved-syntax">vocabulary_lexicon_data</span><span class="plain-syntax">;</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>The structure vocabulary_lexicon_data is accessed in 2/pe and here.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP4" class="paragraph-anchor"></a><b>§4. </b>With the following initialiser:
|
|
</p>
|
|
|
|
<pre class="definitions code-font"><span class="definition-keyword">define</span> <span class="constant-syntax">VOCABULARY_MEANING_INITIALISER_WORDS_CALLBACK</span><span class="plain-syntax"> </span><a href="2-em.html#SP4" class="function-link"><span class="function-syntax">ExcerptMeanings::new_vocabulary_attachment</span></a>
|
|
</pre>
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">vocabulary_lexicon_data</span><span class="plain-syntax"> </span><span class="function-syntax">ExcerptMeanings::new_vocabulary_attachment</span><span class="plain-syntax">(</span><span class="identifier-syntax">vocabulary_entry</span><span class="plain-syntax"> *</span><span class="identifier-syntax">ve</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">ifdef</span><span class="plain-syntax"> </span><span class="identifier-syntax">KINDS_MODULE</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Kinds::Textual::parse_variable</span><span class="plain-syntax">(</span><span class="identifier-syntax">ve</span><span class="plain-syntax">)) </span><span class="identifier-syntax">ve</span><span class="plain-syntax">-></span><span class="identifier-syntax">flags</span><span class="plain-syntax"> |= </span><span class="identifier-syntax">KIND_FAST_MC</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">endif</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">ve</span><span class="plain-syntax">-></span><span class="identifier-syntax">flags</span><span class="plain-syntax">) & </span><span class="identifier-syntax">NUMBER_MC</span><span class="plain-syntax">) </span><span class="identifier-syntax">Cardinals::mark_as_cardinal</span><span class="plain-syntax">(</span><span class="identifier-syntax">ve</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">ve</span><span class="plain-syntax">-></span><span class="identifier-syntax">flags</span><span class="plain-syntax">) & </span><span class="identifier-syntax">ORDINAL_MC</span><span class="plain-syntax">) </span><span class="identifier-syntax">Cardinals::mark_as_ordinal</span><span class="plain-syntax">(</span><span class="identifier-syntax">ve</span><span class="plain-syntax">);</span>
|
|
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">vocabulary_lexicon_data</span><span class="plain-syntax"> </span><span class="identifier-syntax">ld</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">ld</span><span class="plain-syntax">.</span><span class="element-syntax">start_list</span><span class="plain-syntax"> = </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">; </span><span class="identifier-syntax">ld</span><span class="plain-syntax">.</span><span class="element-syntax">end_list</span><span class="plain-syntax"> = </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">; </span><span class="identifier-syntax">ld</span><span class="plain-syntax">.</span><span class="element-syntax">middle_list</span><span class="plain-syntax"> = </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">ld</span><span class="plain-syntax">.</span><span class="element-syntax">subset_list</span><span class="plain-syntax"> = </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">; </span><span class="identifier-syntax">ld</span><span class="plain-syntax">.</span><span class="element-syntax">subset_list_length</span><span class="plain-syntax"> = </span><span class="constant-syntax">0</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">ld</span><span class="plain-syntax">.</span><span class="element-syntax">scanned_already</span><span class="plain-syntax"> = </span><span class="identifier-syntax">FALSE</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">ifdef</span><span class="plain-syntax"> </span><span class="identifier-syntax">KINDS_MODULE</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">ld</span><span class="plain-syntax">.</span><span class="element-syntax">one_word_kind</span><span class="plain-syntax"> = </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">endif</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">ld</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP5" class="paragraph-anchor"></a><b>§5. Creating EMs. </b>The following makes a skeletal EM structure, with no token list or hash code
|
|
as yet.
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax"> *</span><span class="function-syntax">ExcerptMeanings::new</span><button class="popup" onclick="togglePopup('usagePopup1')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup1">Usage of <span class="code-font"><span class="function-syntax">ExcerptMeanings::new</span></span>:<br/><a href="2-em.html#SP12">§12</a></span></button><span class="plain-syntax">(</span><span class="reserved-syntax">unsigned</span><span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">mc</span><span class="plain-syntax">, </span><span class="identifier-syntax">general_pointer</span><span class="plain-syntax"> </span><span class="identifier-syntax">data</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax"> *</span><span class="identifier-syntax">em</span><span class="plain-syntax"> = </span><span class="identifier-syntax">CREATE</span><span class="plain-syntax">(</span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">meaning_code</span><span class="plain-syntax"> = </span><span class="identifier-syntax">mc</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">data</span><span class="plain-syntax"> = </span><span class="identifier-syntax">data</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax"> = </span><span class="constant-syntax">0</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">excerpt_hash</span><span class="plain-syntax"> = </span><span class="constant-syntax">0</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP6" class="paragraph-anchor"></a><b>§6. Debugging log. </b>First to log a general bitmap made up from meaning codes:
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">void</span><span class="plain-syntax"> </span><span class="function-syntax">ExcerptMeanings::log</span><button class="popup" onclick="togglePopup('usagePopup2')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup2">Usage of <span class="code-font"><span class="function-syntax">ExcerptMeanings::log</span></span>:<br/>Lexicon Module - <a href="1-lm.html#SP3">§3</a>, <a href="1-lm.html#SP4">§4</a></span></button><span class="plain-syntax">(</span><span class="identifier-syntax">OUTPUT_STREAM</span><span class="plain-syntax">, </span><span class="reserved-syntax">void</span><span class="plain-syntax"> *</span><span class="identifier-syntax">vem</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax"> *</span><span class="identifier-syntax">em</span><span class="plain-syntax"> = (</span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax"> *) </span><span class="identifier-syntax">vem</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">em</span><span class="plain-syntax"> == </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">) { </span><span class="identifier-syntax">WRITE</span><span class="plain-syntax">(</span><span class="string-syntax">"<null-em>"</span><span class="plain-syntax">); </span><span class="reserved-syntax">return</span><span class="plain-syntax">; }</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">WRITE</span><span class="plain-syntax">(</span><span class="string-syntax">"{"</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">for</span><span class="plain-syntax"> (</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">=0; </span><span class="identifier-syntax">i</span><span class="function-syntax"><em-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax">; </span><span class="identifier-syntax">i</span><span class="plain-syntax">++) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">i</span><span class="plain-syntax">>0) </span><span class="identifier-syntax">WRITE</span><span class="plain-syntax">(</span><span class="string-syntax">" "</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">] == </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">) { </span><span class="identifier-syntax">WRITE</span><span class="plain-syntax">(</span><span class="string-syntax">"#"</span><span class="plain-syntax">); </span><span class="reserved-syntax">continue</span><span class="plain-syntax">; }</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">WRITE</span><span class="plain-syntax">(</span><span class="string-syntax">"%V"</span><span class="plain-syntax">, </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">]);</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">WRITE</span><span class="plain-syntax">(</span><span class="string-syntax">" = "</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">NodeType::log</span><span class="plain-syntax">(</span><span class="identifier-syntax">OUT</span><span class="plain-syntax">, (</span><span class="reserved-syntax">int</span><span class="plain-syntax">) </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">WRITE</span><span class="plain-syntax">(</span><span class="string-syntax">"}"</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax">}</span>
|
|
|
|
<span class="reserved-syntax">void</span><span class="plain-syntax"> </span><span class="function-syntax">ExcerptMeanings::log_all</span><button class="popup" onclick="togglePopup('usagePopup3')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup3">Usage of <span class="code-font"><span class="function-syntax">ExcerptMeanings::log_all</span></span>:<br/>Parse Excerpts - <a href="2-pe.html#SP8">§8</a></span></button><span class="plain-syntax">(</span><span class="reserved-syntax">void</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax"> = </span><span class="constant-syntax">0</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax"> *</span><span class="identifier-syntax">em</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">LOOP_OVER</span><span class="plain-syntax">(</span><span class="identifier-syntax">em</span><span class="plain-syntax">, </span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax">)</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">LOG</span><span class="plain-syntax">(</span><span class="string-syntax">"%02d: %08x $M\n"</span><span class="plain-syntax">, </span><span class="identifier-syntax">i</span><span class="plain-syntax">++, (</span><span class="identifier-syntax">pointer_sized_int</span><span class="plain-syntax">) </span><span class="identifier-syntax">em</span><span class="plain-syntax">, </span><span class="identifier-syntax">em</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP7" class="paragraph-anchor"></a><b>§7. Hashing excerpts. </b>For excerpts <span class="extract"><span class="extract-syntax">(w1, w2)</span></span>, we need a form of hash function which makes it
|
|
easy to test whether the words in one excerpt can all be found in another,
|
|
or to be more exact whether
|
|
</p>
|
|
|
|
<p class="commentary">\(\) \lbrace I_j\mid w_1\leq j\leq w_2\rbrace \subseteq
|
|
\lbrace I_j\mid w_3\leq j\leq w_4\rbrace \(\)
|
|
</p>
|
|
|
|
<p class="commentary">where \(I_n\) is the identity of word \(n\). As with all hash algorithms, we do
|
|
not need to guarantee a positive match, only a negative, so we can throw
|
|
away a lot of information. And we also want a hash function which makes it
|
|
easy to test whether an excerpt contains any of the literals.
|
|
</p>
|
|
|
|
<p class="commentary firstcommentary"><a id="SP8" class="paragraph-anchor"></a><b>§8. </b>There are two sources of text which we might want to hash in this way:
|
|
first, actual excerpts found in the source text. These are not very
|
|
expensive to calculate, but every ounce of speed helps here, so we cache
|
|
the most recent.
|
|
</p>
|
|
|
|
<p class="commentary">The hash generated this way is an arbitrary bitmap of bits 1 to 30, with
|
|
bits 31 and 32 left clear.
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">cached_hash_w1</span><span class="plain-syntax"> = -2, </span><span class="identifier-syntax">cached_hash_w2</span><span class="plain-syntax"> = -2, </span><span class="identifier-syntax">cached_value</span><span class="plain-syntax">;</span>
|
|
|
|
<span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="function-syntax">ExcerptMeanings::hash_code</span><button class="popup" onclick="togglePopup('usagePopup4')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup4">Usage of <span class="code-font"><span class="function-syntax">ExcerptMeanings::hash_code</span></span>:<br/>Lexicon - <a href="1-lxc.html#SP4">§4</a><br/>Parse Excerpts - <a href="2-pe.html#SP5">§5</a></span></button><span class="plain-syntax">(</span><span class="identifier-syntax">wording</span><span class="plain-syntax"> </span><span class="identifier-syntax">W</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Wordings::empty</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">)) </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="constant-syntax">0</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">w1</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Wordings::first_wn</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">), </span><span class="identifier-syntax">w2</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Wordings::last_wn</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">, </span><span class="identifier-syntax">h</span><span class="plain-syntax"> = </span><span class="constant-syntax">0</span><span class="plain-syntax">; </span><span class="identifier-syntax">vocabulary_entry</span><span class="plain-syntax"> *</span><span class="identifier-syntax">v</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">w1</span><span class="plain-syntax"> == </span><span class="identifier-syntax">cached_hash_w1</span><span class="plain-syntax">) && (</span><span class="identifier-syntax">w2</span><span class="plain-syntax"> == </span><span class="identifier-syntax">cached_hash_w2</span><span class="plain-syntax">)) </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">cached_value</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">for</span><span class="plain-syntax"> (</span><span class="identifier-syntax">i</span><span class="plain-syntax">=</span><span class="identifier-syntax">w1</span><span class="plain-syntax">; </span><span class="identifier-syntax">i</span><span class="plain-syntax"><=</span><span class="identifier-syntax">w2</span><span class="plain-syntax">; </span><span class="identifier-syntax">i</span><span class="plain-syntax">++) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">v</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Lexer::word</span><span class="plain-syntax">(</span><span class="identifier-syntax">i</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">v</span><span class="plain-syntax">) </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP8_2" class="named-paragraph-link"><span class="named-paragraph">Allow this vocabulary entry to contribute to the excerpt's hash code</span><span class="named-paragraph-number">8.2</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">h</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP8_1" class="paragraph-anchor"></a><b>§8.1. </b>Second, when a new excerpt meaning is to be registered, we want to hash
|
|
code its token list. But only some of the tokens are vocabulary entries,
|
|
while others instead represent gaps where arbitrary text can appear (referred
|
|
to with a null pointer). Note that we simply ignore that gaps when hashing,
|
|
that is, we produce the same hash as we would if the gaps were not there at
|
|
all.
|
|
</p>
|
|
|
|
<p class="commentary">The hash generated this way is an arbitrary bitmap of bits 1 to 31, with
|
|
bit 32 left clear. Bit 31 is set, as a special case, for excerpts in the
|
|
context of text substitutions which begin with a word known to exist, and
|
|
with differing meanings, in two differently cased forms: this is how "[the
|
|
noun]" is distinguished from "[The noun]". (The lower 30 bits have the
|
|
same meaning as in the first case above.)
|
|
</p>
|
|
|
|
<pre class="definitions code-font"><span class="definition-keyword">define</span> <span class="constant-syntax">CAPITALISED_VARIANT_FORM</span><span class="plain-syntax"> (1 << </span><span class="constant-syntax">30</span><span class="plain-syntax">)</span>
|
|
</pre>
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">void</span><span class="plain-syntax"> </span><span class="function-syntax">ExcerptMeanings::hash_code_from_token_list</span><button class="popup" onclick="togglePopup('usagePopup5')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup5">Usage of <span class="code-font"><span class="function-syntax">ExcerptMeanings::hash_code_from_token_list</span></span>:<br/><a href="2-em.html#SP10_1">§10.1</a></span></button><span class="plain-syntax">(</span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax"> *</span><span class="identifier-syntax">em</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">, </span><span class="identifier-syntax">h</span><span class="plain-syntax"> = </span><span class="constant-syntax">0</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax"> == </span><span class="constant-syntax">0</span><span class="plain-syntax">) </span><span class="identifier-syntax">internal_error</span><span class="plain-syntax">(</span><span class="string-syntax">"Empty text when registering"</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax"> >= </span><span class="constant-syntax">1</span><span class="plain-syntax">) && (</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[0])) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">vocabulary_entry</span><span class="plain-syntax"> *</span><span class="identifier-syntax">lcf</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Vocabulary::get_lower_case_form</span><span class="plain-syntax">(</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[0]);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">lcf</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">h</span><span class="plain-syntax"> = </span><span class="identifier-syntax">h</span><span class="plain-syntax"> | </span><span class="constant-syntax">CAPITALISED_VARIANT_FORM</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[0] = </span><span class="identifier-syntax">lcf</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">for</span><span class="plain-syntax"> (</span><span class="identifier-syntax">i</span><span class="plain-syntax">=0; </span><span class="identifier-syntax">i</span><span class="function-syntax"><em-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax">; </span><span class="identifier-syntax">i</span><span class="plain-syntax">++) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">vocabulary_entry</span><span class="plain-syntax"> *</span><span class="identifier-syntax">v</span><span class="plain-syntax"> = </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">];</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">v</span><span class="plain-syntax">) </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP8_2" class="named-paragraph-link"><span class="named-paragraph">Allow this vocabulary entry to contribute to the excerpt's hash code</span><span class="named-paragraph-number">8.2</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">excerpt_hash</span><span class="plain-syntax"> = </span><span class="identifier-syntax">h</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP8_2" class="paragraph-anchor"></a><b>§8.2. </b>Now each vocabulary entry <span class="extract"><span class="extract-syntax">v</span></span>, i.e., each distinct word identity, itself has
|
|
a hash code to identify it. These are stored in <span class="extract"><span class="extract-syntax">v->hash</span></span> and, except for
|
|
literals, are more or less evenly distributed in about the range 0 to 1000.
|
|
</p>
|
|
|
|
<p class="commentary">The contribution made by a single word's individual hash to the bitmap hash
|
|
for the whole excerpt is as follows.
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Allow this vocabulary entry to contribute to the excerpt's hash code</span><span class="named-paragraph-number">8.2</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">v</span><span class="plain-syntax">-></span><span class="identifier-syntax">flags</span><span class="plain-syntax">) & </span><span class="identifier-syntax">NUMBER_MC</span><span class="plain-syntax">) </span><span class="identifier-syntax">h</span><span class="plain-syntax"> = </span><span class="identifier-syntax">h</span><span class="plain-syntax"> | </span><span class="constant-syntax">1</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">v</span><span class="plain-syntax">-></span><span class="identifier-syntax">flags</span><span class="plain-syntax">) & </span><span class="identifier-syntax">TEXT_MC</span><span class="plain-syntax">) </span><span class="identifier-syntax">h</span><span class="plain-syntax"> = </span><span class="identifier-syntax">h</span><span class="plain-syntax"> | </span><span class="constant-syntax">2</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">v</span><span class="plain-syntax">-></span><span class="identifier-syntax">flags</span><span class="plain-syntax">) & </span><span class="identifier-syntax">I6_MC</span><span class="plain-syntax">) </span><span class="identifier-syntax">h</span><span class="plain-syntax"> = </span><span class="identifier-syntax">h</span><span class="plain-syntax"> | </span><span class="constant-syntax">4</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="identifier-syntax">h</span><span class="plain-syntax"> = </span><span class="identifier-syntax">h</span><span class="plain-syntax"> | (8 << ((</span><span class="identifier-syntax">v</span><span class="plain-syntax">-></span><span class="identifier-syntax">hash</span><span class="plain-syntax">) % </span><span class="constant-syntax">27</span><span class="plain-syntax">));</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP8">§8</a>, <a href="2-em.html#SP8_1">§8.1</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP9" class="paragraph-anchor"></a><b>§9. </b>To sum up: the excerpt hash is a bitmap indicating what categories of
|
|
words are present in the excerpt. It ignores "gaps" in token lists, and
|
|
it ignores the order of the words and repetitions. The three least
|
|
significant bits indicate whether numbers, text or I6 verbatims are
|
|
present, and the next 27 bits indicate the presence of other words: e.g.,
|
|
bit 4 indicates that a word with hash code 0, 27, 54, ..., is present, and
|
|
so on. Bit 31, which is used only for token lists of excerpt meanings,
|
|
marks that an excerpt is a variant form whose first word must be
|
|
capitalised in order for it to match. Bit 32 is always left blank (for
|
|
superstitious reasons to do with the sign bit and differences between
|
|
platforms in handling signed bit shifts).
|
|
</p>
|
|
|
|
<p class="commentary">The result is not a tremendously good hashing number, since it generally
|
|
produces a sparse bitmap, so that the variety is not as great as might be
|
|
thought. But it is optimised for the trickiest parsing cases where the
|
|
rewards of saving unnecessary tests are greatest.
|
|
</p>
|
|
|
|
<p class="commentary firstcommentary"><a id="SP10" class="paragraph-anchor"></a><b>§10. EM Listing. </b>We are clearly not going to store the excerpt meanings in a hash table
|
|
keyed by the hash values of excerpts — with hash values as large as
|
|
\(2^{31}-1\), that would be practically impossible.
|
|
</p>
|
|
|
|
<p class="commentary">Instead we key using the actual words. Each vocabulary entry has four
|
|
linked lists of EMs: its subset list, its start list, its middle list,
|
|
and its end list.
|
|
</p>
|
|
|
|
<ul class="items"><li>(a) If an EM needs to allow parsing as a subset, it must be placed in the
|
|
subset list of every word. For instance, "buttress against cathedral
|
|
wall" registered under the code <span class="extract"><span class="extract-syntax">NOUN_MC</span></span> would be listed
|
|
in the subset lists of "buttress", "against", "cathedral" and "wall".
|
|
</li><li>(b) Otherwise it is placed in only one list:
|
|
<ul class="items"><li>(b1) If the token list consists only of a single gap <span class="extract"><span class="extract-syntax">#</span></span>, we must be
|
|
registering a "say" phrase to say a value. (There is one of these for
|
|
each kind of value.) This meaning is listed under a special <span class="extract"><span class="extract-syntax">blank_says_p</span></span>
|
|
list, which is not attached to any vocabulary entry.
|
|
</li><li>(b2) Otherwise, if the first token is not a <span class="extract"><span class="extract-syntax">#</span></span> gap, it goes into the
|
|
start list for the first token's word: for instance, <span class="extract"><span class="extract-syntax">award # points</span></span> joins
|
|
the start list for "award".
|
|
</li><li>(b3) Otherwise, if the last token is not a <span class="extract"><span class="extract-syntax">#</span></span> gap, it goes into the end
|
|
list for the last token's word: for instance, <span class="extract"><span class="extract-syntax"># in # from now</span></span> joins the
|
|
end list for "now".
|
|
</li><li>(b4) Otherwise, it goes into the middle list of the word for the leftmost
|
|
token which is not a <span class="extract"><span class="extract-syntax">#</span></span>: for instance, <span class="extract"><span class="extract-syntax"># plus #</span></span> joins the middle list for
|
|
"plus".
|
|
</li></ul>
|
|
</li></ul>
|
|
<p class="commentary">Since no token lists of two or more consecutive <span class="extract"><span class="extract-syntax">#</span></span>s cannot exist, this exhausts the possibilities.
|
|
</p>
|
|
|
|
<p class="commentary">Outside of subset mode, we will then test a given excerpt <span class="extract"><span class="extract-syntax">(w1, w2)</span></span> in the
|
|
source text against all possible meanings by checking the start list for <span class="extract"><span class="extract-syntax">w1</span></span>,
|
|
the end list for <span class="extract"><span class="extract-syntax">w2</span></span> and the middle list for every one of <span class="extract"><span class="extract-syntax">(w1+1, w2-1)</span></span>.
|
|
Because of this:
|
|
</p>
|
|
|
|
<ul class="items"><li>(i) Performance suffers if lists for individual words become unbalanced
|
|
in size. This is why we register Unicode translations as "white chess
|
|
knight" rather than "Unicode white chess knight", and so on; the
|
|
alternative would be a stupendously long start list for "unicode".
|
|
</li><li>(ii) Middle lists are tested far more often than start or end lists, so
|
|
we should keep them as small as possible. This is why (b4) above is our last
|
|
resort; happily phrases both starting and ending with <span class="extract"><span class="extract-syntax">#</span></span> are uncommon.
|
|
</li></ul>
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">blank_says_p</span><span class="plain-syntax"> = </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">;</span>
|
|
<span class="reserved-syntax">void</span><span class="plain-syntax"> </span><span class="function-syntax">ExcerptMeanings::register_em</span><button class="popup" onclick="togglePopup('usagePopup6')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup6">Usage of <span class="code-font"><span class="function-syntax">ExcerptMeanings::register_em</span></span>:<br/><a href="2-em.html#SP12">§12</a></span></button><span class="plain-syntax">(</span><span class="reserved-syntax">unsigned</span><span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax">, </span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax"> *</span><span class="identifier-syntax">em</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">ifdef</span><span class="plain-syntax"> </span><span class="identifier-syntax">CORE_MODULE</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">PreformCache::warn_of_changes</span><span class="plain-syntax">(); </span><span class="comment-syntax"> the existence of new meanings jeopardises any cached parsing results</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">endif</span>
|
|
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP10_1" class="named-paragraph-link"><span class="named-paragraph">Compute the new excerpt's hash code from its token list</span><span class="named-paragraph-number">10.1</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP10_2" class="named-paragraph-link"><span class="named-paragraph">Watermark each word in the token list with the meaning code being applied</span><span class="named-paragraph-number">10.2</span></a></span><span class="plain-syntax">;</span>
|
|
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">LOGIF</span><span class="plain-syntax">(</span><span class="identifier-syntax">EXCERPT_MEANINGS</span><span class="plain-syntax">,</span>
|
|
<span class="plain-syntax"> </span><span class="string-syntax">"Logging meaning: $M with hash %08x, mc=%d, %d tokens\n"</span><span class="plain-syntax">,</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">, </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">excerpt_hash</span><span class="plain-syntax">, </span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax">, </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax">);</span>
|
|
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax"> & </span><span class="constant-syntax">SUBSET_PARSING_BITMAP</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP10_3" class="named-paragraph-link"><span class="named-paragraph">Place the new meaning under the subset list for each non-article word</span><span class="named-paragraph-number">10.3</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">ifdef</span><span class="plain-syntax"> </span><span class="identifier-syntax">EM_ALLOW_BLANK_TEST_LEXICON_CALLBACK</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax"> == </span><span class="constant-syntax">1</span><span class="plain-syntax">) && (</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[0] == </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">) &&</span>
|
|
<span class="plain-syntax"> (</span><span class="identifier-syntax">EM_ALLOW_BLANK_TEST_LEXICON_CALLBACK</span><span class="plain-syntax">(</span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax">))) {</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP10_4" class="named-paragraph-link"><span class="named-paragraph">Place the new meaning under the say-blank list</span><span class="named-paragraph-number">10.4</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">endif</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[0]) {</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP10_5" class="named-paragraph-link"><span class="named-paragraph">Place the new meaning under the start list of the first word</span><span class="named-paragraph-number">10.5</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> } </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="identifier-syntax">no_em_tokens</span><span class="plain-syntax">-1]) {</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP10_6" class="named-paragraph-link"><span class="named-paragraph">Place the new meaning under the end list of the last word</span><span class="named-paragraph-number">10.6</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> } </span><span class="reserved-syntax">else</span><span class="plain-syntax"> {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">for</span><span class="plain-syntax"> (</span><span class="identifier-syntax">i</span><span class="plain-syntax">=1; </span><span class="identifier-syntax">i</span><span class="function-syntax"><em-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax">-1; </span><span class="identifier-syntax">i</span><span class="plain-syntax">++)</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">]) { </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP10_7" class="named-paragraph-link"><span class="named-paragraph">Place the new meaning under the middle list of word i</span><span class="named-paragraph-number">10.7</span></a></span><span class="plain-syntax">; </span><span class="reserved-syntax">break</span><span class="plain-syntax">; }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">i</span><span class="plain-syntax"> >= </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax">-1) </span><span class="identifier-syntax">internal_error</span><span class="plain-syntax">(</span><span class="string-syntax">"registered meaning of two or more #s"</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP10_1" class="paragraph-anchor"></a><b>§10.1. </b>See above.
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Compute the new excerpt's hash code from its token list</span><span class="named-paragraph-number">10.1</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><a href="2-em.html#SP8_1" class="function-link"><span class="function-syntax">ExcerptMeanings::hash_code_from_token_list</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">em</span><span class="plain-syntax">);</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP10">§10</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP10_2" class="paragraph-anchor"></a><b>§10.2. </b>Another important optimisation is to flag each word in the meaning with
|
|
the given meaning code — this is why vocabulary flags and excerpt meaning
|
|
codes share the same numbering space. If we register "Table of Surgical
|
|
Instruments" as a table name, the word "surgical", for instance, picks
|
|
up the <span class="extract"><span class="extract-syntax">TABLE_MC</span></span> bit in its <span class="extract"><span class="extract-syntax">flags</span></span> bitmap.
|
|
</p>
|
|
|
|
<p class="commentary">The advantage of this is that if we want to see whether <span class="extract"><span class="extract-syntax">(w1, w2)</span></span> might be
|
|
a table name, we can take a bitwise AND of the flags for each word in
|
|
the range; if the result doesn't have the <span class="extract"><span class="extract-syntax">TABLE_MC</span></span> bit set, then at least
|
|
one of the words never occurs in a table name, so the answer must be
|
|
"no". This produces rapid, definite negatives with only a few false
|
|
positives.
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Watermark each word in the token list with the meaning code being applied</span><span class="named-paragraph-number">10.2</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">for</span><span class="plain-syntax"> (</span><span class="identifier-syntax">i</span><span class="plain-syntax">=0; </span><span class="identifier-syntax">i</span><span class="function-syntax"><em-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax">; </span><span class="identifier-syntax">i</span><span class="plain-syntax">++)</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">])</span>
|
|
<span class="plain-syntax"> ((</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">])-></span><span class="identifier-syntax">flags</span><span class="plain-syntax">) |= </span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax">;</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP10">§10</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP10_3" class="paragraph-anchor"></a><b>§10.3. </b>Note that articles (a, an, the, some) are excluded: this means we don't
|
|
waste time trying to see if the excerpt "the" might be a reference to the
|
|
object "Gregory the Great".
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Place the new meaning under the subset list for each non-article word</span><span class="named-paragraph-number">10.3</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">for</span><span class="plain-syntax"> (</span><span class="identifier-syntax">i</span><span class="plain-syntax">=0; </span><span class="identifier-syntax">i</span><span class="function-syntax"><em-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax">; </span><span class="identifier-syntax">i</span><span class="plain-syntax">++) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">vocabulary_entry</span><span class="plain-syntax"> *</span><span class="identifier-syntax">v</span><span class="plain-syntax"> = </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">];</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">v</span><span class="plain-syntax"> == </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">LOG</span><span class="plain-syntax">(</span><span class="string-syntax">"Logging meaning: $M with hash %08x\n"</span><span class="plain-syntax">, </span><span class="identifier-syntax">em</span><span class="plain-syntax">, </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">excerpt_hash</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">internal_error</span><span class="plain-syntax">(</span><span class="string-syntax">"# in registration of subset meaning"</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">NTI::test_vocabulary</span><span class="plain-syntax">(</span><span class="identifier-syntax">v</span><span class="plain-syntax">, </span><span class="function-syntax"><article></span><span class="plain-syntax">)) </span><span class="reserved-syntax">continue</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">p</span><span class="plain-syntax"> = </span><a href="2-em.html#SP11" class="function-link"><span class="function-syntax">ExcerptMeanings::new_em_pnode</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">em</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">p</span><span class="plain-syntax">-></span><span class="identifier-syntax">next_alternative</span><span class="plain-syntax"> = </span><span class="identifier-syntax">v</span><span class="plain-syntax">-></span><span class="identifier-syntax">means</span><span class="plain-syntax">.</span><span class="element-syntax">subset_list</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">v</span><span class="plain-syntax">-></span><span class="identifier-syntax">means</span><span class="plain-syntax">.</span><span class="element-syntax">subset_list</span><span class="plain-syntax"> = </span><span class="identifier-syntax">p</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">v</span><span class="plain-syntax">-></span><span class="identifier-syntax">means</span><span class="plain-syntax">.</span><span class="element-syntax">subset_list_length</span><span class="plain-syntax">++;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP10">§10</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP10_4" class="paragraph-anchor"></a><b>§10.4. </b>To register <span class="extract"><span class="extract-syntax">#</span></span>, which is what "To say (N - a number)" and similar
|
|
constructions translate to.
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Place the new meaning under the say-blank list</span><span class="named-paragraph-number">10.4</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">p</span><span class="plain-syntax"> = </span><a href="2-em.html#SP11" class="function-link"><span class="function-syntax">ExcerptMeanings::new_em_pnode</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">em</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">blank_says_p</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">p2</span><span class="plain-syntax"> = </span><span class="identifier-syntax">blank_says_p</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">while</span><span class="plain-syntax"> (</span><span class="identifier-syntax">p2</span><span class="plain-syntax">-></span><span class="identifier-syntax">next_alternative</span><span class="plain-syntax">) </span><span class="identifier-syntax">p2</span><span class="plain-syntax"> = </span><span class="identifier-syntax">p2</span><span class="plain-syntax">-></span><span class="identifier-syntax">next_alternative</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">p2</span><span class="plain-syntax">-></span><span class="identifier-syntax">next_alternative</span><span class="plain-syntax"> = </span><span class="identifier-syntax">p</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="identifier-syntax">blank_says_p</span><span class="plain-syntax"> = </span><span class="identifier-syntax">p</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">LOGIF</span><span class="plain-syntax">(</span><span class="identifier-syntax">EXCERPT_MEANINGS</span><span class="plain-syntax">,</span>
|
|
<span class="plain-syntax"> </span><span class="string-syntax">"The blank list with $M is now:\n$T"</span><span class="plain-syntax">, </span><span class="identifier-syntax">em</span><span class="plain-syntax">, </span><span class="identifier-syntax">blank_says_p</span><span class="plain-syntax">);</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP10">§10</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP10_5" class="paragraph-anchor"></a><b>§10.5. </b><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Place the new meaning under the start list of the first word</span><span class="named-paragraph-number">10.5</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">p</span><span class="plain-syntax"> = </span><a href="2-em.html#SP11" class="function-link"><span class="function-syntax">ExcerptMeanings::new_em_pnode</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">em</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">p</span><span class="plain-syntax">-></span><span class="identifier-syntax">next_alternative</span><span class="plain-syntax"> = </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[0]-></span><span class="identifier-syntax">means</span><span class="plain-syntax">.</span><span class="element-syntax">start_list</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[0]-></span><span class="identifier-syntax">means</span><span class="plain-syntax">.</span><span class="element-syntax">start_list</span><span class="plain-syntax"> = </span><span class="identifier-syntax">p</span><span class="plain-syntax">;</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP10">§10</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP10_6" class="paragraph-anchor"></a><b>§10.6. </b>...and similarly...
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Place the new meaning under the end list of the last word</span><span class="named-paragraph-number">10.6</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">p</span><span class="plain-syntax"> = </span><a href="2-em.html#SP11" class="function-link"><span class="function-syntax">ExcerptMeanings::new_em_pnode</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">em</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">p</span><span class="plain-syntax">-></span><span class="identifier-syntax">next_alternative</span><span class="plain-syntax"> = </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax">-1]-></span><span class="identifier-syntax">means</span><span class="plain-syntax">.</span><span class="element-syntax">end_list</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax">-1]-></span><span class="identifier-syntax">means</span><span class="plain-syntax">.</span><span class="element-syntax">end_list</span><span class="plain-syntax"> = </span><span class="identifier-syntax">p</span><span class="plain-syntax">;</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP10">§10</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP10_7" class="paragraph-anchor"></a><b>§10.7. </b>...and similarly again:
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Place the new meaning under the middle list of word i</span><span class="named-paragraph-number">10.7</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">p</span><span class="plain-syntax"> = </span><a href="2-em.html#SP11" class="function-link"><span class="function-syntax">ExcerptMeanings::new_em_pnode</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">em</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">p</span><span class="plain-syntax">-></span><span class="identifier-syntax">next_alternative</span><span class="plain-syntax"> = </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">]-></span><span class="identifier-syntax">means</span><span class="plain-syntax">.</span><span class="element-syntax">middle_list</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">]-></span><span class="identifier-syntax">means</span><span class="plain-syntax">.</span><span class="element-syntax">middle_list</span><span class="plain-syntax"> = </span><span class="identifier-syntax">p</span><span class="plain-syntax">;</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP10">§10</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP11" class="paragraph-anchor"></a><b>§11. </b>Parse nodes are only created from excerpt meanings for storage inside the
|
|
excerpt parser, so these never live on into trees.
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="function-syntax">ExcerptMeanings::new_em_pnode</span><button class="popup" onclick="togglePopup('usagePopup7')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup7">Usage of <span class="code-font"><span class="function-syntax">ExcerptMeanings::new_em_pnode</span></span>:<br/><a href="2-em.html#SP10_3">§10.3</a>, <a href="2-em.html#SP10_4">§10.4</a>, <a href="2-em.html#SP10_5">§10.5</a>, <a href="2-em.html#SP10_6">§10.6</a>, <a href="2-em.html#SP10_7">§10.7</a></span></button><span class="plain-syntax">(</span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax"> *</span><span class="identifier-syntax">em</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">parse_node</span><span class="plain-syntax"> *</span><span class="identifier-syntax">pn</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Node::new</span><span class="plain-syntax">(</span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">meaning_code</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">Node::set_meaning</span><span class="plain-syntax">(</span><span class="identifier-syntax">pn</span><span class="plain-syntax">, </span><span class="identifier-syntax">em</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">pn</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP12" class="paragraph-anchor"></a><b>§12. Registration. </b>The following is the main routine used throughout Inform to register new
|
|
meanings.
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax"> *</span><span class="function-syntax">ExcerptMeanings::register</span><button class="popup" onclick="togglePopup('usagePopup8')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup8">Usage of <span class="code-font"><span class="function-syntax">ExcerptMeanings::register</span></span>:<br/>Lexicon - <a href="1-lxc.html#SP1">§1</a></span></button><span class="plain-syntax">(</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">unsigned</span><span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax">, </span><span class="identifier-syntax">wording</span><span class="plain-syntax"> </span><span class="identifier-syntax">W</span><span class="plain-syntax">, </span><span class="identifier-syntax">general_pointer</span><span class="plain-syntax"> </span><span class="identifier-syntax">data</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Wordings::empty</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">)) </span><span class="identifier-syntax">internal_error</span><span class="plain-syntax">(</span><span class="string-syntax">"tried to register empty excerpt meaning"</span><span class="plain-syntax">);</span>
|
|
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">ifdef</span><span class="plain-syntax"> </span><span class="identifier-syntax">CORE_MODULE</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax"> == </span><span class="constant-syntax">NOUN_MC</span><span class="plain-syntax">)</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">LOOP_THROUGH_WORDING</span><span class="plain-syntax">(</span><span class="identifier-syntax">i</span><span class="plain-syntax">, </span><span class="identifier-syntax">W</span><span class="plain-syntax">)</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">NTI::mark_word</span><span class="plain-syntax">(</span><span class="identifier-syntax">i</span><span class="plain-syntax">, </span><span class="function-syntax"><s-object-instance></span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax"> == </span><span class="identifier-syntax">KIND_SLOW_MC</span><span class="plain-syntax">)</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">LOOP_THROUGH_WORDING</span><span class="plain-syntax">(</span><span class="identifier-syntax">i</span><span class="plain-syntax">, </span><span class="identifier-syntax">W</span><span class="plain-syntax">)</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">NTI::mark_word</span><span class="plain-syntax">(</span><span class="identifier-syntax">i</span><span class="plain-syntax">, </span><span class="function-syntax"><k-kind></span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">endif</span>
|
|
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">excerpt_meaning</span><span class="plain-syntax"> *</span><span class="identifier-syntax">em</span><span class="plain-syntax"> = </span><a href="2-em.html#SP5" class="function-link"><span class="function-syntax">ExcerptMeanings::new</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax">, </span><span class="identifier-syntax">data</span><span class="plain-syntax">);</span>
|
|
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP12_1" class="named-paragraph-link"><span class="named-paragraph">Unless this is parametrised, skip any initial article</span><span class="named-paragraph-number">12.1</span></a></span><span class="plain-syntax">;</span>
|
|
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">ifdef</span><span class="plain-syntax"> </span><span class="identifier-syntax">EM_CASE_SENSITIVITY_TEST_LEXICON_CALLBACK</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">EM_CASE_SENSITIVITY_TEST_LEXICON_CALLBACK</span><span class="plain-syntax">(</span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax">))</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP12_2" class="named-paragraph-link"><span class="named-paragraph">Detect use of upper case on the first word of this new text substitution</span><span class="named-paragraph-number">12.2</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">endif</span>
|
|
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP12_3" class="named-paragraph-link"><span class="named-paragraph">Build the token list for the new EM</span><span class="named-paragraph-number">12.3</span></a></span><span class="plain-syntax">;</span>
|
|
|
|
<span class="plain-syntax"> </span><a href="2-em.html#SP10" class="function-link"><span class="function-syntax">ExcerptMeanings::register_em</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax">, </span><span class="identifier-syntax">em</span><span class="plain-syntax">);</span>
|
|
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP12_1" class="paragraph-anchor"></a><b>§12.1. </b>Articles are preserved at the front of phrase definitions, mainly because
|
|
text substitutions need to distinguish (for instance) "say [the X]" from
|
|
"say [an X]".
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Unless this is parametrised, skip any initial article</span><span class="named-paragraph-number">12.1</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax"> & </span><span class="constant-syntax">PARAMETRISED_PARSING_BITMAP</span><span class="plain-syntax">) == </span><span class="constant-syntax">0</span><span class="plain-syntax">)</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">NTI::test_word</span><span class="plain-syntax">(</span><span class="identifier-syntax">Wordings::first_wn</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">), </span><span class="function-syntax"><article></span><span class="plain-syntax">)) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">W</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Wordings::trim_first_word</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Wordings::empty</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">))</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">internal_error</span><span class="plain-syntax">(</span><span class="string-syntax">"registered a meaning which was only an article"</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> }</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP12">§12</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP12_2" class="paragraph-anchor"></a><b>§12.2. </b>Because an open bracket fails <span class="extract"><span class="extract-syntax">isupper</span></span>, the following looks at the first
|
|
letter of the first word only if it's not a blank. If it finds upper case, as
|
|
it would when reading the "T" in:
|
|
</p>
|
|
|
|
<blockquote>
|
|
<p>To say The Portrait: ...</p>
|
|
</blockquote>
|
|
|
|
<p class="commentary">then it makes a new upper-case version of the word "the", i.e., "The",
|
|
with a distinct lexical identity; and places this distinguished identity as
|
|
the new first token. This ensures that we end up with a different token list
|
|
from the one in:
|
|
</p>
|
|
|
|
<blockquote>
|
|
<p>To say the Portrait: ...</p>
|
|
</blockquote>
|
|
|
|
<p class="commentary">(These are the only circumstances in which phrase parsing has any case
|
|
sensitivity.)
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Detect use of upper case on the first word of this new text substitution</span><span class="named-paragraph-number">12.2</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">wchar_t</span><span class="plain-syntax"> *</span><span class="identifier-syntax">tx</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Lexer::word_raw_text</span><span class="plain-syntax">(</span><span class="identifier-syntax">Wordings::first_wn</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">));</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">tx</span><span class="plain-syntax">[0]) && ((</span><span class="identifier-syntax">isupper</span><span class="plain-syntax">(</span><span class="identifier-syntax">tx</span><span class="plain-syntax">[0])) || (</span><span class="identifier-syntax">tx</span><span class="plain-syntax">[1] == </span><span class="constant-syntax">0</span><span class="plain-syntax">))) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">vocabulary_entry</span><span class="plain-syntax"> *</span><span class="identifier-syntax">ucf</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Vocabulary::make_case_sensitive</span><span class="plain-syntax">(</span><span class="identifier-syntax">Lexer::word</span><span class="plain-syntax">(</span><span class="identifier-syntax">Wordings::first_wn</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">)));</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (!</span><span class="identifier-syntax">Characters::isupper</span><span class="plain-syntax">(</span><span class="identifier-syntax">tx</span><span class="plain-syntax">[0])) </span><span class="identifier-syntax">ucf</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Vocabulary::get_lower_case_form</span><span class="plain-syntax">(</span><span class="identifier-syntax">ucf</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">Lexer::set_word</span><span class="plain-syntax">(</span><span class="identifier-syntax">Wordings::first_wn</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">), </span><span class="identifier-syntax">ucf</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">LOGIF</span><span class="plain-syntax">(</span><span class="identifier-syntax">EXCERPT_MEANINGS</span><span class="plain-syntax">,</span>
|
|
<span class="plain-syntax"> </span><span class="string-syntax">"Allowing initial capitalised word %w: meaning_code = %08x\n"</span><span class="plain-syntax">,</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">tx</span><span class="plain-syntax">, </span><span class="identifier-syntax">meaning_code</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> }</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP12">§12</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP12_3" class="paragraph-anchor"></a><b>§12.3. </b>We read the text in something like:
|
|
</p>
|
|
|
|
<blockquote>
|
|
<p>award (P - a number) points</p>
|
|
</blockquote>
|
|
|
|
<p class="commentary">and transcribe it into the token list, collapsing bracketed parts into <span class="extract"><span class="extract-syntax">#</span></span>
|
|
tokens denoting gaps, to result in something like:
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> award # points</span>
|
|
</pre>
|
|
<p class="commentary">with a token count of 3.
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Build the token list for the new EM</span><span class="named-paragraph-number">12.3</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">tc</span><span class="plain-syntax"> = </span><span class="constant-syntax">0</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">for</span><span class="plain-syntax"> (</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">=0; </span><span class="identifier-syntax">i</span><span class="plain-syntax"> < </span><span class="identifier-syntax">Wordings::length</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">); </span><span class="identifier-syntax">i</span><span class="plain-syntax">++) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">tc</span><span class="plain-syntax"> >= </span><span class="constant-syntax">MAX_TOKENS_PER_EXCERPT_MEANING</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP12_3_3" class="named-paragraph-link"><span class="named-paragraph">Complain of excessive length of the new excerpt</span><span class="named-paragraph-number">12.3.3</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">break</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">compare_word</span><span class="plain-syntax">(</span><span class="identifier-syntax">Wordings::first_wn</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">) + </span><span class="identifier-syntax">i</span><span class="plain-syntax">, </span><span class="identifier-syntax">OPENBRACKET_V</span><span class="plain-syntax">)) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">tc</span><span class="plain-syntax">++] = </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="2-em.html#SP12_3_1" class="named-paragraph-link"><span class="named-paragraph">Skip over bracketed token description</span><span class="named-paragraph-number">12.3.1</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> } </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">em_tokens</span><span class="plain-syntax">[</span><span class="identifier-syntax">tc</span><span class="plain-syntax">++] = </span><span class="identifier-syntax">Lexer::word</span><span class="plain-syntax">(</span><span class="identifier-syntax">Wordings::first_wn</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">) + </span><span class="identifier-syntax">i</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">em</span><span class="plain-syntax">-></span><span class="element-syntax">no_em_tokens</span><span class="plain-syntax"> = </span><span class="identifier-syntax">tc</span><span class="plain-syntax">;</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP12">§12</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP12_3_1" class="paragraph-anchor"></a><b>§12.3.1. </b>This is all a little defensive, but syntax bugs higher up tend to find
|
|
their way down to this plughole:
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Skip over bracketed token description</span><span class="named-paragraph-number">12.3.1</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">bl</span><span class="plain-syntax"> = </span><span class="constant-syntax">1</span><span class="plain-syntax">; </span><span class="identifier-syntax">i</span><span class="plain-syntax">++;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">while</span><span class="plain-syntax"> (</span><span class="identifier-syntax">bl</span><span class="plain-syntax"> > </span><span class="constant-syntax">0</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">i</span><span class="plain-syntax"> >= </span><span class="identifier-syntax">Wordings::length</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">)) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">LOG</span><span class="plain-syntax">(</span><span class="string-syntax">"Bad meaning: <%W>\n"</span><span class="plain-syntax">, </span><span class="identifier-syntax">W</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">internal_error</span><span class="plain-syntax">(</span><span class="string-syntax">"Bracket mismatch when registering"</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">compare_word</span><span class="plain-syntax">(</span><span class="identifier-syntax">Wordings::first_wn</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">) + </span><span class="identifier-syntax">i</span><span class="plain-syntax">, </span><span class="identifier-syntax">OPENBRACKET_V</span><span class="plain-syntax">)) </span><span class="identifier-syntax">bl</span><span class="plain-syntax">++;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">compare_word</span><span class="plain-syntax">(</span><span class="identifier-syntax">Wordings::first_wn</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">) + </span><span class="identifier-syntax">i</span><span class="plain-syntax">, </span><span class="identifier-syntax">CLOSEBRACKET_V</span><span class="plain-syntax">)) </span><span class="identifier-syntax">bl</span><span class="plain-syntax">--;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">++;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">i</span><span class="plain-syntax"> < </span><span class="identifier-syntax">Wordings::length</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">)) && (</span><span class="identifier-syntax">compare_word</span><span class="plain-syntax">(</span><span class="identifier-syntax">Wordings::first_wn</span><span class="plain-syntax">(</span><span class="identifier-syntax">W</span><span class="plain-syntax">) + </span><span class="identifier-syntax">i</span><span class="plain-syntax">, </span><span class="identifier-syntax">OPENBRACKET_V</span><span class="plain-syntax">))) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">LOG</span><span class="plain-syntax">(</span><span class="string-syntax">"Bad meaning: <%W>\n"</span><span class="plain-syntax">, </span><span class="identifier-syntax">W</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">internal_error</span><span class="plain-syntax">(</span><span class="string-syntax">"Two consecutive bracketed tokens when registering"</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">--;</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP12_3">§12.3</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP12_3_2" class="paragraph-anchor"></a><b>§12.3.2. </b>In practice, nobody ever hits this message except deliberately. It has
|
|
a tendency to fire twice or more on the same source text because of
|
|
registering multiple inflected forms of the same text; but it's not worth
|
|
going to any trouble to prevent this.
|
|
</p>
|
|
|
|
<p class="commentary">(At present, this is actually the only lexicon error.)
|
|
</p>
|
|
|
|
<pre class="definitions code-font"><span class="definition-keyword">enum</span> <span class="constant-syntax">TooLongName_LEXICONERROR</span><span class="plain-syntax"> </span><span class="identifier-syntax">from</span><span class="plain-syntax"> </span><span class="constant-syntax">1</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP12_3_3" class="paragraph-anchor"></a><b>§12.3.3. </b><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Complain of excessive length of the new excerpt</span><span class="named-paragraph-number">12.3.3</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><a href="2-em.html#SP13" class="function-link"><span class="function-syntax">ExcerptMeanings::problem_handler</span></a><span class="plain-syntax">(</span><span class="constant-syntax">TooLongName_LEXICONERROR</span><span class="plain-syntax">, </span><span class="identifier-syntax">EMPTY_WORDING</span><span class="plain-syntax">, </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">, </span><span class="constant-syntax">0</span><span class="plain-syntax">);</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="2-em.html#SP12_3">§12.3</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP13" class="paragraph-anchor"></a><b>§13. Errors. </b>Some tools using this module will want to push simple error messages out to
|
|
the command line; others will want to translate them into elaborate problem
|
|
texts in HTML. So the client is allowed to define <span class="extract"><span class="extract-syntax">PROBLEM_LEXICON_CALLBACK</span></span>
|
|
to some routine of her own, gazumping this one.
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">void</span><span class="plain-syntax"> </span><span class="function-syntax">ExcerptMeanings::problem_handler</span><button class="popup" onclick="togglePopup('usagePopup9')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup9">Usage of <span class="code-font"><span class="function-syntax">ExcerptMeanings::problem_handler</span></span>:<br/><a href="2-em.html#SP12_3_3">§12.3.3</a></span></button><span class="plain-syntax">(</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">err_no</span><span class="plain-syntax">, </span><span class="identifier-syntax">wording</span><span class="plain-syntax"> </span><span class="identifier-syntax">W</span><span class="plain-syntax">, </span><span class="reserved-syntax">void</span><span class="plain-syntax"> *</span><span class="identifier-syntax">ref</span><span class="plain-syntax">, </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">k</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">ifdef</span><span class="plain-syntax"> </span><span class="identifier-syntax">PROBLEM_LEXICON_CALLBACK</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">PROBLEM_LEXICON_CALLBACK</span><span class="plain-syntax">(</span><span class="identifier-syntax">err_no</span><span class="plain-syntax">, </span><span class="identifier-syntax">W</span><span class="plain-syntax">, </span><span class="identifier-syntax">ref</span><span class="plain-syntax">, </span><span class="identifier-syntax">k</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">endif</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">ifndef</span><span class="plain-syntax"> </span><span class="identifier-syntax">PROBLEM_LEXICON_CALLBACK</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">TEMPORARY_TEXT</span><span class="plain-syntax">(</span><span class="identifier-syntax">text</span><span class="plain-syntax">)</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">WRITE_TO</span><span class="plain-syntax">(</span><span class="identifier-syntax">text</span><span class="plain-syntax">, </span><span class="string-syntax">"%+W"</span><span class="plain-syntax">, </span><span class="identifier-syntax">W</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">switch</span><span class="plain-syntax"> (</span><span class="identifier-syntax">err_no</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">case</span><span class="plain-syntax"> </span><span class="identifier-syntax">TooLongName_LEXICONERROR:</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">Errors::nowhere</span><span class="plain-syntax">(</span><span class="string-syntax">"noun too long"</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">break</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">DISCARD_TEXT</span><span class="plain-syntax">(</span><span class="identifier-syntax">text</span><span class="plain-syntax">)</span>
|
|
<span class="plain-syntax"> #</span><span class="identifier-syntax">endif</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<nav role="progress"><div class="progresscontainer">
|
|
<ul class="progressbar"><li class="progressprev"><a href="1-lxc.html">❮</a></li><li class="progresschapter"><a href="P-wtmd.html">P</a></li><li class="progresschapter"><a href="1-lm.html">1</a></li><li class="progresscurrentchapter">2</li><li class="progresscurrent">em</li><li class="progresssection"><a href="2-pe.html">pe</a></li><li class="progressnext"><a href="2-pe.html">❯</a></li></ul></div>
|
|
</nav><!--End of weave-->
|
|
|
|
</main>
|
|
</body>
|
|
</html>
|
|
|