1
0
Fork 0
mirror of https://github.com/ganelson/inform.git synced 2024-07-09 10:34:22 +03:00
inform7/docs/core-module/5-ut.html

206 lines
17 KiB
HTML
Raw Normal View History

2019-03-17 14:40:57 +02:00
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>5/uem</title>
2020-03-19 02:11:25 +02:00
<meta name="viewport" content="width=device-width initial-scale=1">
2019-03-17 14:40:57 +02:00
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta http-equiv="Content-Language" content="en-gb">
2020-03-19 02:11:25 +02:00
<link href="../inweb.css" rel="stylesheet" rev="stylesheet" type="text/css">
2019-03-17 14:40:57 +02:00
</head>
<body>
2020-03-19 02:11:25 +02:00
<nav role="navigation">
<h1><a href="../webs.html">Sources</a></h1>
<ul>
<li><a href="../compiler.html"><b>compiler tools</b></a></li>
2020-03-19 02:11:25 +02:00
<li><a href="../other.html">other tools</a></li>
<li><a href="../extensions.html">extensions and kits</a></li>
<li><a href="../units.html">unit test tools</a></li>
</ul>
<h2>Compiler Webs</h2>
<ul>
<li><a href="../inbuild/index.html">inbuild</a></li>
<li><a href="../inform7/index.html">inform7</a></li>
<li><a href="../inter/index.html">inter</a></li>
</ul>
<h2>Inbuild Modules</h2>
<ul>
<li><a href="../inbuild-module/index.html">inbuild</a></li>
<li><a href="../arch-module/index.html">arch</a></li>
<li><a href="../words-module/index.html">words</a></li>
<li><a href="../syntax-module/index.html">syntax</a></li>
<li><a href="../html-module/index.html">html</a></li>
</ul>
<h2>Inform7 Modules</h2>
<ul>
<li><a href="../core-module/index.html">core</a></li>
<li><a href="../problems-module/index.html">problems</a></li>
<li><a href="../inflections-module/index.html">inflections</a></li>
<li><a href="../linguistics-module/index.html">linguistics</a></li>
<li><a href="../kinds-module/index.html">kinds</a></li>
<li><a href="../if-module/index.html">if</a></li>
<li><a href="../multimedia-module/index.html">multimedia</a></li>
<li><a href="../index-module/index.html">index</a></li>
</ul>
<h2>Inter Modules</h2>
<ul>
<li><a href="../inter-module/index.html">inter</a></li>
<li><a href="../building-module/index.html">building</a></li>
<li><a href="../codegen-module/index.html">codegen</a></li>
</ul>
<h2>Foundation</h2>
<ul>
<li><a href="../../../inweb/docs/foundation-module/index.html">foundation</a></li>
</ul>
</nav>
<main role="main">
2019-03-17 14:40:57 +02:00
<!--Weave of '5/ut' generated by 7-->
2020-03-22 12:50:19 +02:00
<ul class="crumbs"><li><a href="../webs.html">Source</a></li><li><a href="../compiler.html">Compiler Modules</a></li><li><a href="index.html">core</a></li><li><a href="index.html#5">Chapter 5: Nouns</a></li><li><b>Unicode Translations</b></li></ul><p class="purpose">To manage the names assigned to Unicode character values.</p>
2019-03-17 14:40:57 +02:00
<p class="inwebparagraph"><a id="SP1"></a><b>&#167;1. </b>There are no data structures here; Unicode names are simply a category of
excerpt meanings, so we read a "translates into Unicode as" sentence as
a new name and its meaning to be.
</p>
<pre class="display">
<span class="reserved">void</span><span class="plain"> </span><span class="functiontext">UnicodeTranslations::unicode_translates</span><span class="plain">(</span><span class="identifier">parse_node</span><span class="plain"> *</span><span class="identifier">pn</span><span class="plain">) {</span>
2020-04-07 03:06:09 +03:00
<span class="reserved">if</span><span class="plain"> (&lt;</span><span class="identifier">translates</span><span class="plain">-</span><span class="identifier">into</span><span class="plain">-</span><span class="identifier">unicode</span><span class="plain">-</span><span class="identifier">sentence</span><span class="plain">-</span><span class="identifier">object</span><span class="plain">&gt;(</span><span class="identifier">ParseTree::get_text</span><span class="plain">(</span><span class="identifier">pn</span><span class="plain">-&gt;</span><span class="element">next</span><span class="plain">-&gt;</span><span class="element">next</span><span class="plain">)) == </span><span class="identifier">FALSE</span><span class="plain">) </span><span class="reserved">return</span><span class="plain">;</span>
2019-03-17 14:40:57 +02:00
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">cc</span><span class="plain"> = &lt;&lt;</span><span class="identifier">r</span><span class="plain">&gt;&gt;;</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">UnicodeTranslations::char_in_range</span><span class="plain">(</span><span class="identifier">cc</span><span class="plain">) == </span><span class="identifier">FALSE</span><span class="plain">) </span><span class="reserved">return</span><span class="plain">;</span>
2020-04-07 03:06:09 +03:00
<span class="plain">&lt;</span><span class="identifier">translates</span><span class="plain">-</span><span class="identifier">into</span><span class="plain">-</span><span class="identifier">unicode</span><span class="plain">-</span><span class="identifier">sentence</span><span class="plain">-</span><span class="identifier">subject</span><span class="plain">&gt;(</span><span class="identifier">ParseTree::get_text</span><span class="plain">(</span><span class="identifier">pn</span><span class="plain">-&gt;</span><span class="element">next</span><span class="plain">));</span>
2019-03-17 14:40:57 +02:00
<span class="reserved">if</span><span class="plain"> ((&lt;&lt;</span><span class="identifier">r</span><span class="plain">&gt;&gt; != -1) &amp;&amp; (&lt;&lt;</span><span class="identifier">r</span><span class="plain">&gt;&gt; != </span><span class="identifier">cc</span><span class="plain">)) {</span>
2020-03-11 02:21:09 +02:00
<span class="identifier">Problems::Issue::sentence_problem</span><span class="plain">(</span><span class="functiontext">Task::syntax_tree</span><span class="plain">(), </span><span class="identifier">_p_</span><span class="plain">(</span><span class="identifier">PM_UnicodeAlready</span><span class="plain">),</span>
2019-03-17 14:40:57 +02:00
<span class="string">"this Unicode character name has already been translated"</span><span class="plain">,</span>
<span class="string">"so there must be some duplication somewhere."</span><span class="plain">);</span>
<span class="reserved">return</span><span class="plain">;</span>
<span class="plain">}</span>
2020-04-07 03:06:09 +03:00
<span class="identifier">Nouns::new_proper_noun</span><span class="plain">(</span><span class="identifier">ParseTree::get_text</span><span class="plain">(</span><span class="identifier">pn</span><span class="plain">-&gt;</span><span class="element">next</span><span class="plain">), </span><span class="identifier">NEUTER_GENDER</span><span class="plain">,</span>
2019-03-17 14:40:57 +02:00
<span class="identifier">REGISTER_SINGULAR_NTOPT</span><span class="plain"> + </span><span class="identifier">PARSE_EXACTLY_NTOPT</span><span class="plain">,</span>
<span class="identifier">MISCELLANEOUS_MC</span><span class="plain">,</span>
2020-04-07 03:06:09 +03:00
<span class="identifier">NounPhrases::new_raw</span><span class="plain">(</span><span class="identifier">ParseTree::get_text</span><span class="plain">(</span><span class="identifier">pn</span><span class="plain">-&gt;</span><span class="element">next</span><span class="plain">-&gt;</span><span class="element">next</span><span class="plain">)));</span>
2019-03-17 14:40:57 +02:00
<span class="plain">}</span>
</pre>
<p class="inwebparagraph"></p>
<p class="endnote">The function UnicodeTranslations::unicode_translates is used in 7/ns (<a href="7-ns.html#SP18">&#167;18</a>).</p>
<p class="inwebparagraph"><a id="SP2"></a><b>&#167;2. </b>The following parses the subject noun phrase of sentences like
</p>
<blockquote>
<p>leftwards harpoon with barb upwards translates into Unicode as 8636.</p>
</blockquote>
<p class="inwebparagraph">The subject "leftwards harpoon with barb upwards" is parsed against the
Unicode character names known already to make sure that this new translation
doesn't disagree with an existing one (that is, doesn't translate to a
different code number).
</p>
<pre class="display">
<span class="plain">&lt;</span><span class="identifier">translates</span><span class="plain">-</span><span class="identifier">into</span><span class="plain">-</span><span class="identifier">unicode</span><span class="plain">-</span><span class="identifier">sentence</span><span class="plain">-</span><span class="identifier">subject</span><span class="plain">&gt; ::=</span>
<span class="plain">&lt;</span><span class="identifier">unicode</span><span class="plain">-</span><span class="identifier">character</span><span class="plain">-</span><span class="identifier">name</span><span class="plain">&gt; | ==&gt; </span><span class="identifier">R</span><span class="plain">[1]</span>
<span class="plain">... ==&gt; -1</span>
</pre>
<p class="inwebparagraph"></p>
<p class="inwebparagraph"><a id="SP3"></a><b>&#167;3. </b>And this parses the object noun phrase of such sentences &mdash; a decimal
number. I was tempted to allow hexadecimal here, but life's too short.
Unicode translation sentences are really only technicalities needed by
the built-in extensions anyway; Inform authors never type them.
</p>
<pre class="display">
<span class="plain">&lt;</span><span class="identifier">translates</span><span class="plain">-</span><span class="identifier">into</span><span class="plain">-</span><span class="identifier">unicode</span><span class="plain">-</span><span class="identifier">sentence</span><span class="plain">-</span><span class="identifier">object</span><span class="plain">&gt; ::=</span>
<span class="plain">&lt;</span><span class="identifier">cardinal</span><span class="plain">-</span><span class="identifier">number</span><span class="plain">-</span><span class="identifier">unlimited</span><span class="plain">&gt; | ==&gt; </span><span class="identifier">R</span><span class="plain">[1]</span>
<span class="plain">... ==&gt; </span>&lt;<span class="cwebmacro">Issue PM_UnicodeNonLiteral problem</span> <span class="cwebmacronumber">3.1</span>&gt;
</pre>
<p class="inwebparagraph"></p>
<p class="inwebparagraph"><a id="SP3_1"></a><b>&#167;3.1. </b><code class="display">
&lt;<span class="cwebmacrodefn">Issue PM_UnicodeNonLiteral problem</span> <span class="cwebmacronumber">3.1</span>&gt; =
</code></p>
<pre class="displaydefn">
2020-03-11 02:21:09 +02:00
<span class="identifier">Problems::Issue::sentence_problem</span><span class="plain">(</span><span class="functiontext">Task::syntax_tree</span><span class="plain">(), </span><span class="identifier">_p_</span><span class="plain">(</span><span class="identifier">PM_UnicodeNonLiteral</span><span class="plain">),</span>
2019-03-17 14:40:57 +02:00
<span class="string">"a Unicode character name must be translated into a literal decimal "</span>
<span class="string">"number written out in digits"</span><span class="plain">,</span>
<span class="string">"which this seems not to be."</span><span class="plain">);</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">FALSE</span><span class="plain">;</span>
</pre>
<p class="inwebparagraph"></p>
<p class="endnote">This code is used in <a href="#SP3">&#167;3</a>.</p>
<p class="inwebparagraph"><a id="SP4"></a><b>&#167;4. </b>The following is called only on excerpts from the source where it is a
fairly safe bet that a Unicode character is referred to. For example, when
the player types either of these:
</p>
<blockquote>
<p>"[unicode 321]odz Churchyard"</p>
</blockquote>
<blockquote>
<p>"[unicode Latin capital letter L with stroke]odz Churchyard"</p>
</blockquote>
<p class="inwebparagraph">...then the text after the word "unicode" is parsed by &lt;s-unicode-character&gt;.
</p>
<pre class="display">
<span class="plain">&lt;</span><span class="identifier">s</span><span class="plain">-</span><span class="identifier">unicode</span><span class="plain">-</span><span class="identifier">character</span><span class="plain">&gt; ::=</span>
<span class="plain">&lt;</span><span class="identifier">cardinal</span><span class="plain">-</span><span class="identifier">number</span><span class="plain">-</span><span class="identifier">unlimited</span><span class="plain">&gt; | ==&gt; </span><span class="functiontext">Rvalues::from_Unicode_point</span><span class="plain">(</span><span class="identifier">R</span><span class="plain">[1], </span><span class="identifier">W</span><span class="plain">); </span><span class="reserved">if</span><span class="plain"> (!(</span><span class="functiontext">UnicodeTranslations::char_in_range</span><span class="plain">(</span><span class="identifier">R</span><span class="plain">[1]))) </span><span class="reserved">return</span><span class="plain"> </span><span class="identifier">FALSE</span><span class="plain">;</span>
<span class="plain">&lt;</span><span class="identifier">unicode</span><span class="plain">-</span><span class="identifier">character</span><span class="plain">-</span><span class="identifier">name</span><span class="plain">&gt; ==&gt; </span><span class="functiontext">Rvalues::from_Unicode_point</span><span class="plain">(</span><span class="identifier">R</span><span class="plain">[1], </span><span class="identifier">W</span><span class="plain">)</span>
<span class="plain">&lt;</span><span class="identifier">unicode</span><span class="plain">-</span><span class="identifier">character</span><span class="plain">-</span><span class="identifier">name</span><span class="plain">&gt; </span><span class="identifier">internal</span><span class="plain"> {</span>
<span class="identifier">parse_node</span><span class="plain"> *</span><span class="identifier">p</span><span class="plain"> = </span><span class="identifier">ExParser::parse_excerpt</span><span class="plain">(</span><span class="identifier">MISCELLANEOUS_MC</span><span class="plain">, </span><span class="identifier">W</span><span class="plain">);</span>
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">p</span><span class="plain">) &amp;&amp; (</span><span class="identifier">ParseTree::get_type</span><span class="plain">(</span><span class="identifier">p</span><span class="plain">) == </span><span class="identifier">PROPER_NOUN_NT</span><span class="plain">)) {</span>
<span class="plain">*</span><span class="identifier">X</span><span class="plain"> = </span><span class="identifier">Vocabulary::get_literal_number_value</span><span class="plain">(</span><span class="identifier">Lexer::word</span><span class="plain">(</span><span class="identifier">Wordings::first_wn</span><span class="plain">(</span><span class="identifier">ParseTree::get_text</span><span class="plain">(</span><span class="identifier">p</span><span class="plain">))));</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">TRUE</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">FALSE</span><span class="plain">;</span>
<span class="plain">}</span>
</pre>
<p class="inwebparagraph"></p>
<p class="inwebparagraph"><a id="SP5"></a><b>&#167;5. </b>And here is the range check:
</p>
<pre class="display">
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">UnicodeTranslations::char_in_range</span><span class="plain">(</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">cc</span><span class="plain">) {</span>
2020-04-07 03:06:09 +03:00
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">cc</span><span class="plain"> &lt; </span><span class="constant">0</span><span class="plain">) || (</span><span class="identifier">cc</span><span class="plain"> &gt;= </span><span class="constant">0x10000</span><span class="plain">)) {</span>
2020-03-11 02:21:09 +02:00
<span class="identifier">Problems::Issue::sentence_problem</span><span class="plain">(</span><span class="functiontext">Task::syntax_tree</span><span class="plain">(), </span><span class="identifier">_p_</span><span class="plain">(</span><span class="identifier">PM_UnicodeOutOfRange</span><span class="plain">),</span>
2019-03-17 14:40:57 +02:00
<span class="string">"Inform can only handle Unicode characters in the 16-bit range"</span><span class="plain">,</span>
<span class="string">"from 0 to 65535."</span><span class="plain">);</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">FALSE</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">TRUE</span><span class="plain">;</span>
<span class="plain">}</span>
</pre>
<p class="inwebparagraph"></p>
<p class="endnote">The function UnicodeTranslations::char_in_range is used in <a href="#SP1">&#167;1</a>, <a href="#SP4">&#167;4</a>.</p>
<hr class="tocbar">
<ul class="toc"><li><a href="5-uem.html">Back to 'Using Excerpt Meanings'</a></li><li><a href="5-un.html">Continue with 'Using Nametags'</a></li></ul><hr class="tocbar">
2019-04-22 17:42:10 +03:00
<!--End of weave-->
2020-03-19 02:11:25 +02:00
</main>
2019-03-17 14:40:57 +02:00
</body>
</html>