mirror of
https://github.com/ganelson/inform.git
synced 2024-07-08 01:54:21 +03:00
465 lines
92 KiB
HTML
465 lines
92 KiB
HTML
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
|
<html>
|
|
<head>
|
|
<title>Unicode Literals</title>
|
|
<link href="../docs-assets/Breadcrumbs.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<meta name="viewport" content="width=device-width initial-scale=1">
|
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
<meta http-equiv="Content-Language" content="en-gb">
|
|
|
|
<link href="../docs-assets/Contents.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<link href="../docs-assets/Progress.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<link href="../docs-assets/Navigation.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<link href="../docs-assets/Fonts.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<link href="../docs-assets/Base.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<script>
|
|
function togglePopup(material_id) {
|
|
var popup = document.getElementById(material_id);
|
|
popup.classList.toggle("show");
|
|
}
|
|
</script>
|
|
|
|
<link href="../docs-assets/Popups.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<link href="../docs-assets/Colours.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
<link href="../docs-assets/Preform-Colours.css" rel="stylesheet" rev="stylesheet" type="text/css">
|
|
|
|
</head>
|
|
<body class="commentary-font">
|
|
<nav role="navigation">
|
|
<h1><a href="../index.html">
|
|
<img src="../docs-assets/Inform.png" height=72">
|
|
</a></h1>
|
|
<ul><li><a href="../index.html">home</a></li>
|
|
</ul><h2>Compiler</h2><ul>
|
|
<li><a href="../structure.html">structure</a></li>
|
|
<li><a href="../inbuildn.html">inbuild</a></li>
|
|
<li><a href="../inform7n.html">inform7</a></li>
|
|
<li><a href="../intern.html">inter</a></li>
|
|
<li><a href="../services.html">services</a></li>
|
|
<li><a href="../secrets.html">secrets</a></li>
|
|
</ul><h2>Other Tools</h2><ul>
|
|
<li><a href="../inblorbn.html">inblorb</a></li>
|
|
<li><a href="../indocn.html">indoc</a></li>
|
|
<li><a href="../inform6.html">inform6</a></li>
|
|
<li><a href="../inpolicyn.html">inpolicy</a></li>
|
|
<li><a href="../inrtpsn.html">inrtps</a></li>
|
|
</ul><h2>Resources</h2><ul>
|
|
<li><a href="../extensions.html">extensions</a></li>
|
|
<li><a href="../kits.html">kits</a></li>
|
|
</ul><h2>Repository</h2><ul>
|
|
<li><a href="https://github.com/ganelson/inform"><img src="../docs-assets/github.png" height=18> github</a></li>
|
|
</ul><h2>Related Projects</h2><ul>
|
|
<li><a href="../../../inweb/index.html">inweb</a></li>
|
|
<li><a href="../../../intest/index.html">intest</a></li>
|
|
|
|
</ul>
|
|
</nav>
|
|
<main role="main">
|
|
<!--Weave of 'Unicode Literals' generated by Inweb-->
|
|
<div class="breadcrumbs">
|
|
<ul class="crumbs"><li><a href="../index.html">Home</a></li><li><a href="../inform7n.html">Inform7</a></li><li><a href="index.html">values</a></li><li><a href="index.html#3">Chapter 3: Literals</a></li><li><b>Unicode Literals</b></li></ul></div>
|
|
<p class="purpose">To manage the names assigned to Unicode character values.</p>
|
|
|
|
<ul class="toc"><li><a href="3-ul.html#SP1">§1. Parsing</a></li><li><a href="3-ul.html#SP2">§2. Code points</a></li><li><a href="3-ul.html#SP6">§6. Using the Unicode data</a></li></ul><hr class="tocbar">
|
|
|
|
<p class="commentary firstcommentary"><a id="SP1" class="paragraph-anchor"></a><b>§1. Parsing. </b>The following is called only on excerpts from the source where it is a
|
|
fairly safe bet that a Unicode character is referred to. For example, when
|
|
the player types either of these:
|
|
</p>
|
|
|
|
<blockquote>
|
|
<p>"[unicode 321]odz Churchyard"</p>
|
|
</blockquote>
|
|
|
|
<blockquote>
|
|
<p>"[unicode Latin capital letter L with stroke]odz Churchyard"</p>
|
|
</blockquote>
|
|
|
|
<p class="commentary">...then the text after the word "unicode" is parsed by <s-unicode-character>.
|
|
</p>
|
|
|
|
<pre class="Preform-displayed-code all-displayed-code code-font">
|
|
<span class="Preform-function-syntax"><s-unicode-character></span><span class="Preform-plain-syntax"> </span><span class="Preform-reserved-syntax">::=</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-function-syntax"><cardinal-number-unlimited></span><span class="Preform-plain-syntax"> </span><span class="Preform-reserved-syntax">|</span><span class="Preform-plain-syntax"> </span><span class="Preform-reserved-syntax">==></span><span class="Preform-plain-syntax"> { -, Rvalues::from_Unicode(UnicodeLiterals::max(R[1]), W) }</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-function-syntax"><unicode-character-name></span><span class="Preform-plain-syntax"> </span><span class="Preform-reserved-syntax">==></span><span class="Preform-plain-syntax"> { -, Rvalues::from_Unicode(R[1], W) }</span>
|
|
|
|
<span class="Preform-function-syntax"><unicode-character-name></span><span class="Preform-plain-syntax"> </span><span class="Preform-constant-syntax">internal</span><span class="Preform-plain-syntax"> </span><span class="Preform-constant-syntax">{</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-identifier-syntax">TEMPORARY_TEXT</span><span class="Preform-plain-syntax">(</span><span class="Preform-identifier-syntax">N</span><span class="Preform-plain-syntax">)</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-identifier-syntax">WRITE_TO</span><span class="Preform-plain-syntax">(</span><span class="Preform-identifier-syntax">N</span><span class="Preform-plain-syntax">, </span><span class="Preform-string-syntax">"%W"</span><span class="Preform-plain-syntax">, </span><span class="Preform-identifier-syntax">W</span><span class="Preform-plain-syntax">);</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-reserved-syntax">for</span><span class="Preform-plain-syntax"> (</span><span class="Preform-reserved-syntax">int</span><span class="Preform-plain-syntax"> </span><span class="Preform-identifier-syntax">i</span><span class="Preform-plain-syntax">=0; </span><span class="Preform-identifier-syntax">i</span><span class="Preform-plain-syntax"><</span><span class="Preform-identifier-syntax">Str::len</span><span class="Preform-plain-syntax">(</span><span class="Preform-identifier-syntax">N</span><span class="Preform-plain-syntax">); </span><span class="Preform-identifier-syntax">i</span><span class="Preform-plain-syntax">++)</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-identifier-syntax">Str::put_at</span><span class="Preform-plain-syntax">(</span><span class="Preform-identifier-syntax">N</span><span class="Preform-plain-syntax">, </span><span class="Preform-identifier-syntax">i</span><span class="Preform-plain-syntax">, </span><span class="Preform-identifier-syntax">Characters::toupper</span><span class="Preform-plain-syntax">(</span><span class="Preform-identifier-syntax">Str::get_at</span><span class="Preform-plain-syntax">(</span><span class="Preform-identifier-syntax">N</span><span class="Preform-plain-syntax">, </span><span class="Preform-identifier-syntax">i</span><span class="Preform-plain-syntax">)));</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-reserved-syntax">int</span><span class="Preform-plain-syntax"> </span><span class="Preform-identifier-syntax">U</span><span class="Preform-plain-syntax"> = </span><a href="3-ul.html#SP6" class="function-link"><span class="Preform-function-syntax">UnicodeLiterals::parse</span></a><span class="Preform-plain-syntax">(</span><span class="Preform-identifier-syntax">N</span><span class="Preform-plain-syntax">);</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-identifier-syntax">DISCARD_TEXT</span><span class="Preform-plain-syntax">(</span><span class="Preform-identifier-syntax">N</span><span class="Preform-plain-syntax">)</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-reserved-syntax">if</span><span class="Preform-plain-syntax"> (</span><span class="Preform-identifier-syntax">U</span><span class="Preform-plain-syntax"> >= </span><span class="Preform-constant-syntax">0</span><span class="Preform-plain-syntax">) {</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-reserved-syntax">if</span><span class="Preform-plain-syntax"> ((</span><span class="Preform-identifier-syntax">TargetVMs::is_16_bit</span><span class="Preform-plain-syntax">(</span><span class="Preform-identifier-syntax">Task::vm</span><span class="Preform-plain-syntax">())) && (</span><span class="Preform-identifier-syntax">U</span><span class="Preform-plain-syntax"> >= </span><span class="Preform-constant-syntax">0x10000</span><span class="Preform-plain-syntax">)) {</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="3-ul.html#SP1_2" class="named-paragraph-link"><span class="named-paragraph">Issue PM_UnicodeOutOfRange</span><span class="named-paragraph-number">1.2</span></a></span><span class="Preform-plain-syntax">;</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-identifier-syntax">U</span><span class="Preform-plain-syntax"> = </span><span class="Preform-constant-syntax">65</span><span class="Preform-plain-syntax">;</span>
|
|
<span class="Preform-plain-syntax"> }</span>
|
|
<span class="Preform-plain-syntax"> ==> { </span><a href="3-ul.html#SP1_1" class="function-link"><span class="Preform-function-syntax">UnicodeLiterals::max</span></a><span class="Preform-plain-syntax">(</span><span class="Preform-identifier-syntax">U</span><span class="Preform-plain-syntax">), - };</span>
|
|
<span class="Preform-plain-syntax"> </span><span class="Preform-reserved-syntax">return</span><span class="Preform-plain-syntax"> </span><span class="Preform-identifier-syntax">TRUE</span><span class="Preform-plain-syntax">;</span>
|
|
<span class="Preform-plain-syntax"> }</span>
|
|
<span class="Preform-plain-syntax"> ==> { </span><span class="Preform-identifier-syntax">fail</span><span class="Preform-plain-syntax"> </span><span class="Preform-identifier-syntax">nonterminal</span><span class="Preform-plain-syntax"> };</span>
|
|
<span class="Preform-plain-syntax">}</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This is <a href="../words-module/4-ap.html" class="internal">Preform grammar</a>, not regular C code.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP1_1" class="paragraph-anchor"></a><b>§1.1. </b>And here is the range check. Values above <span class="extract"><span class="Preform-extract-syntax">MAX_UNICODE_CODE_POINT</span></span> are
|
|
permitted, but need to be specified numerically.
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="function-syntax">UnicodeLiterals::max</span><button class="popup" onclick="togglePopup('usagePopup1')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup1">Usage of <span class="code-font"><span class="function-syntax">UnicodeLiterals::max</span></span>:<br/><a href="3-ul.html#SP1">§1</a></span></button><span class="plain-syntax">(</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">cc</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">cc</span><span class="plain-syntax"> < </span><span class="constant-syntax">0</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="3-ul.html#SP1_2" class="named-paragraph-link"><span class="named-paragraph">Issue PM_UnicodeOutOfRange</span><span class="named-paragraph-number">1.2</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="constant-syntax">65</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">cc</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP1_2" class="paragraph-anchor"></a><b>§1.2. </b><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Issue PM_UnicodeOutOfRange</span><span class="named-paragraph-number">1.2</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">StandardProblems::sentence_problem</span><span class="plain-syntax">(</span><span class="identifier-syntax">Task::syntax_tree</span><span class="plain-syntax">(), </span><span class="identifier-syntax">_p_</span><span class="plain-syntax">(</span><span class="identifier-syntax">PM_UnicodeOutOfRange</span><span class="plain-syntax">),</span>
|
|
<span class="plain-syntax"> </span><span class="string-syntax">"this character value is beyond the range which the current story "</span>
|
|
<span class="plain-syntax"> </span><span class="string-syntax">"could handle"</span><span class="plain-syntax">,</span>
|
|
<span class="plain-syntax"> </span><span class="string-syntax">"which is from 0 to (hexadecimal) FFFF for stories compiled to the "</span>
|
|
<span class="plain-syntax"> </span><span class="string-syntax">"Z-machine, and otherwise 0 to 1FFFF."</span><span class="plain-syntax">);</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="3-ul.html#SP1">§1</a>, <a href="3-ul.html#SP1_1">§1.1</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP2" class="paragraph-anchor"></a><b>§2. Code points. </b>Each distinct code point in the Unicode specification will correspond to one
|
|
of these:
|
|
</p>
|
|
|
|
<pre class="definitions code-font"><span class="definition-keyword">define</span> <span class="constant-syntax">MAX_UNICODE_CODE_POINT</span><span class="plain-syntax"> </span><span class="constant-syntax">0x20000</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Cc_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="identifier-syntax">from</span><span class="plain-syntax"> </span><span class="constant-syntax">1</span><span class="plain-syntax"> </span><span class="comment-syntax"> Other, Control</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Cf_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Other, Format</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Cn_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Other, Not Assigned: no character actually has this</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Co_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Other, Private Use</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Cs_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Other, Surrogate</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Ll_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Letter, Lowercase</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Lm_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Letter, Modifier</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Lo_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Letter, Other</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Lt_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Letter, Titlecase</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Lu_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Letter, Uppercase</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Mc_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Mark, Spacing Combining</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Me_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Mark, Enclosing</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Mn_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Mark, Non-Spacing</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Nd_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Number, Decimal Digit</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Nl_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Number, Letter</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">No_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Number, Other</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Pc_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Punctuation, Connector</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Pd_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Punctuation, Dash</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Pe_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Punctuation, Close</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Pf_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Punctuation, Final quote</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Pi_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Punctuation, Initial quote</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Po_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Punctuation, Other</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Ps_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Punctuation, Open</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Sc_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Symbol, Currency</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Sk_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Symbol, Modifier</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Sm_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Symbol, Math</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">So_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Symbol, Other</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Zl_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Separator, Line</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Zp_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Separator, Paragraph</span>
|
|
<span class="definition-keyword">enum</span> <span class="constant-syntax">Zs_UNICODE_CAT</span><span class="plain-syntax"> </span><span class="comment-syntax"> Separator, Space</span>
|
|
</pre>
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">typedef</span><span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">code_point</span><span class="plain-syntax">; </span><span class="comment-syntax"> in the range 0 to MAX_UNICODE_CODE_POINT - 1</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="identifier-syntax">text_stream</span><span class="plain-syntax"> *</span><span class="identifier-syntax">name</span><span class="plain-syntax">; </span><span class="comment-syntax"> e.g. "RIGHT-FACING ARMENIAN ETERNITY SIGN"</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">category</span><span class="plain-syntax">; </span><span class="comment-syntax"> one of the </span><span class="extract"><span class="extract-syntax">*_UNICODE_CAT</span></span><span class="comment-syntax"> values above</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">tolower</span><span class="plain-syntax">; </span><span class="comment-syntax"> -1 if no mapping to lower case is available, or a code point</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">toupper</span><span class="plain-syntax">; </span><span class="comment-syntax"> -1 if no mapping to upper case is available, or a code point</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">totitle</span><span class="plain-syntax">; </span><span class="comment-syntax"> -1 if no mapping to title case is available, or a code point</span>
|
|
<span class="plain-syntax">} </span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax">;</span>
|
|
|
|
<span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> </span><span class="function-syntax">UnicodeLiterals::new_code_point</span><button class="popup" onclick="togglePopup('usagePopup2')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup2">Usage of <span class="code-font"><span class="function-syntax">UnicodeLiterals::new_code_point</span></span>:<br/><a href="3-ul.html#SP3">§3</a></span></button><span class="plain-syntax">(</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">C</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">.</span><span class="element-syntax">code_point</span><span class="plain-syntax"> = </span><span class="identifier-syntax">C</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">.</span><span class="element-syntax">name</span><span class="plain-syntax"> = </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">.</span><span class="element-syntax">category</span><span class="plain-syntax"> = </span><span class="constant-syntax">Cn_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">.</span><span class="element-syntax">tolower</span><span class="plain-syntax"> = -1;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">.</span><span class="element-syntax">toupper</span><span class="plain-syntax"> = -1;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">.</span><span class="element-syntax">totitle</span><span class="plain-syntax"> = -1;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>The structure unicode_point is accessed in 2/spc, 2/rvl, 2/lvl, 5/dsh and here.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP3" class="paragraph-anchor"></a><b>§3. </b>Storage for these is managed on demand, in a flexibly-sized array:
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> *</span><span class="identifier-syntax">unicode_points</span><span class="plain-syntax"> = </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">; </span><span class="comment-syntax"> array indexed by code point</span>
|
|
<span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">unicode_points_extent</span><span class="plain-syntax"> = </span><span class="constant-syntax">0</span><span class="plain-syntax">; </span><span class="comment-syntax"> current number of entries in that array</span>
|
|
<span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">max_known_unicode_point</span><span class="plain-syntax"> = </span><span class="constant-syntax">0</span><span class="plain-syntax">;</span>
|
|
|
|
<span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> *</span><span class="function-syntax">UnicodeLiterals::code_point</span><button class="popup" onclick="togglePopup('usagePopup3')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup3">Usage of <span class="code-font"><span class="function-syntax">UnicodeLiterals::code_point</span></span>:<br/><a href="3-ul.html#SP5">§5</a>, <a href="3-ul.html#SP7">§7</a></span></button><span class="plain-syntax">(</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">U</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">U</span><span class="plain-syntax"> < </span><span class="constant-syntax">0</span><span class="plain-syntax">) || (</span><span class="identifier-syntax">U</span><span class="plain-syntax"> >= </span><span class="constant-syntax">MAX_UNICODE_CODE_POINT</span><span class="plain-syntax">)) </span><span class="identifier-syntax">internal_error</span><span class="plain-syntax">(</span><span class="string-syntax">"Unicode point out of range"</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><a href="3-ul.html#SP4" class="function-link"><span class="function-syntax">UnicodeLiterals::ensure_data</span></a><span class="plain-syntax">();</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">U</span><span class="plain-syntax"> >= </span><span class="identifier-syntax">unicode_points_extent</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">new_extent</span><span class="plain-syntax"> = </span><span class="identifier-syntax">unicode_points_extent</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">new_extent</span><span class="plain-syntax"> == </span><span class="constant-syntax">0</span><span class="plain-syntax">) </span><span class="identifier-syntax">new_extent</span><span class="plain-syntax"> = </span><span class="constant-syntax">1</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">while</span><span class="plain-syntax"> (</span><span class="identifier-syntax">new_extent</span><span class="plain-syntax"> <= </span><span class="identifier-syntax">U</span><span class="plain-syntax">) </span><span class="identifier-syntax">new_extent</span><span class="plain-syntax"> = </span><span class="constant-syntax">2</span><span class="plain-syntax">*</span><span class="identifier-syntax">new_extent</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> *</span><span class="identifier-syntax">new_unicode_points</span><span class="plain-syntax"> = (</span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> *)</span>
|
|
<span class="plain-syntax"> (</span><span class="identifier-syntax">Memory::calloc</span><span class="plain-syntax">(</span><span class="identifier-syntax">new_extent</span><span class="plain-syntax">, </span><span class="reserved-syntax">sizeof</span><span class="plain-syntax">(</span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax">), </span><span class="constant-syntax">UNICODE_DATA_MREASON</span><span class="plain-syntax">));</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">for</span><span class="plain-syntax"> (</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">=0; </span><span class="identifier-syntax">i</span><span class="plain-syntax"><</span><span class="identifier-syntax">unicode_points_extent</span><span class="plain-syntax">; </span><span class="identifier-syntax">i</span><span class="plain-syntax">++)</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">new_unicode_points</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">] = </span><span class="identifier-syntax">unicode_points</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">];</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">for</span><span class="plain-syntax"> (</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">=</span><span class="identifier-syntax">unicode_points_extent</span><span class="plain-syntax">; </span><span class="identifier-syntax">i</span><span class="plain-syntax"><</span><span class="identifier-syntax">new_extent</span><span class="plain-syntax">; </span><span class="identifier-syntax">i</span><span class="plain-syntax">++)</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">new_unicode_points</span><span class="plain-syntax">[</span><span class="identifier-syntax">i</span><span class="plain-syntax">] = </span><a href="3-ul.html#SP2" class="function-link"><span class="function-syntax">UnicodeLiterals::new_code_point</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">i</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">unicode_points_extent</span><span class="plain-syntax"> > </span><span class="constant-syntax">0</span><span class="plain-syntax">)</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">Memory::I7_array_free</span><span class="plain-syntax">(</span><span class="identifier-syntax">unicode_points</span><span class="plain-syntax">,</span>
|
|
<span class="plain-syntax"> </span><span class="constant-syntax">UNICODE_DATA_MREASON</span><span class="plain-syntax">, </span><span class="identifier-syntax">unicode_points_extent</span><span class="plain-syntax">, </span><span class="reserved-syntax">sizeof</span><span class="plain-syntax">(</span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax">));</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">unicode_points</span><span class="plain-syntax"> = </span><span class="identifier-syntax">new_unicode_points</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">unicode_points_extent</span><span class="plain-syntax"> = </span><span class="identifier-syntax">new_extent</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">U</span><span class="plain-syntax"> > </span><span class="identifier-syntax">max_known_unicode_point</span><span class="plain-syntax">) </span><span class="identifier-syntax">max_known_unicode_point</span><span class="plain-syntax"> = </span><span class="identifier-syntax">U</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> &(</span><span class="identifier-syntax">unicode_points</span><span class="plain-syntax">[</span><span class="identifier-syntax">U</span><span class="plain-syntax">]);</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP4" class="paragraph-anchor"></a><b>§4. </b>The standard Inform distribution includes the current Unicode specification's
|
|
main data file. Although parsing that file is relatively fast, we do it only
|
|
on demand, because it's not small (about 2 MB of text) and is often not needed.
|
|
</p>
|
|
|
|
<p class="commentary">The <span class="extract"><span class="extract-syntax">UnicodeData_lookup</span></span> dictionary really associates texts (names of characters)
|
|
with non-negative integers (their code points), but our <span class="extract"><span class="extract-syntax">dictionary</span></span> type only
|
|
allows texts-to-pointers, so we wrap these integers up into <span class="extract"><span class="extract-syntax">unicode_lookup_value</span></span>
|
|
to which we can then have pointers.
|
|
</p>
|
|
|
|
<p class="commentary">(As noted by David Kinder in May 2023, it's unsafe to use this dictionary to
|
|
associate texts with <span class="extract"><span class="extract-syntax">unicode_point *</span></span> values, because the flexible-sized array
|
|
holding those means that they will move around in memory. If we are lucky, the
|
|
memory freed when the old version of the array is surpassed will be left intact
|
|
and then the dictionary pointers to it will all work fine: if we are not lucky,
|
|
for example if the memory environment is stressed because <span class="extract"><span class="extract-syntax">intest</span></span> is running
|
|
many simultaneous copies of Inform, then that space will be reused and the
|
|
dictionary pointers will be invalid.)
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="identifier-syntax">dictionary</span><span class="plain-syntax"> *</span><span class="identifier-syntax">UnicodeData_lookup</span><span class="plain-syntax"> = </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">;</span>
|
|
<span class="reserved-syntax">typedef</span><span class="plain-syntax"> </span><span class="reserved-syntax">struct</span><span class="plain-syntax"> </span><span class="reserved-syntax">unicode_lookup_value</span><span class="plain-syntax"> {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">code_point</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">} </span><span class="reserved-syntax">unicode_lookup_value</span><span class="plain-syntax">;</span>
|
|
|
|
<span class="reserved-syntax">void</span><span class="plain-syntax"> </span><span class="function-syntax">UnicodeLiterals::ensure_data</span><button class="popup" onclick="togglePopup('usagePopup4')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup4">Usage of <span class="code-font"><span class="function-syntax">UnicodeLiterals::ensure_data</span></span>:<br/><a href="3-ul.html#SP3">§3</a>, <a href="3-ul.html#SP6">§6</a></span></button><span class="plain-syntax">(</span><span class="reserved-syntax">void</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">UnicodeData_lookup</span><span class="plain-syntax"> == </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">UnicodeData_lookup</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Dictionaries::new</span><span class="plain-syntax">(65536, </span><span class="identifier-syntax">FALSE</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">filename</span><span class="plain-syntax"> *</span><span class="identifier-syntax">F</span><span class="plain-syntax"> = </span><span class="identifier-syntax">InstalledFiles::filename</span><span class="plain-syntax">(</span><span class="identifier-syntax">UNICODE_DATA_IRES</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">TextFiles::read</span><span class="plain-syntax">(</span><span class="identifier-syntax">F</span><span class="plain-syntax">, </span><span class="identifier-syntax">FALSE</span><span class="plain-syntax">, </span><span class="string-syntax">"can't open UnicodeData file"</span><span class="plain-syntax">, </span><span class="identifier-syntax">TRUE</span><span class="plain-syntax">,</span>
|
|
<span class="plain-syntax"> &</span><a href="3-ul.html#SP5" class="function-link"><span class="function-syntax">UnicodeLiterals::read_line</span></a><span class="plain-syntax">, </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">, </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">LOG</span><span class="plain-syntax">(</span><span class="string-syntax">"Read Unicode data to code point 0x%06x in %f\n"</span><span class="plain-syntax">, </span><span class="identifier-syntax">max_known_unicode_point</span><span class="plain-syntax">, </span><span class="identifier-syntax">F</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>The structure unicode_lookup_value is private to this section.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP5" class="paragraph-anchor"></a><b>§5. </b>The format of this file is admirably stable. Lines look like so:
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> 0067;LATIN SMALL LETTER G;Ll;0;L;;;;;N;;;0047;;0047</span>
|
|
<span class="plain-syntax"> 1C85;CYRILLIC SMALL LETTER THREE-LEGGED TE;Ll;0;L;;;;;N;;;0422;;0422</span>
|
|
<span class="plain-syntax"> 1FAA1;SEWING NEEDLE;So;0;ON;;;;;N;;;;;</span>
|
|
</pre>
|
|
<p class="commentary">Each line corresponds to a code point. They're presented in the file in ascending
|
|
order of these values, but we make no use of that fact. Each line contains fields
|
|
divided by semicolons, and semicolon characters are illegal in any field.
|
|
</p>
|
|
|
|
<pre class="definitions code-font"><span class="definition-keyword">define</span> <span class="constant-syntax">CODE_VALUE_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">0</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">NAME_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">1</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">GENERAL_CATEGORY_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">2</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">COMBINING_CLASSES_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">3</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">BIDIRECTIONAL_CATEGORY_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">4</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">DECOMPOSITION_MAPPING_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">5</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">DECIMAL_DIGIT_VALUE_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">6</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">DIGIT_VALUE_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">7</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">NUMERIC_VALUE_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">8</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">MIRRORED_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">9</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">OLD_NAME_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">10</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">ISO_10646_COMMENT_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">11</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">UC_MAPPING_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">12</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">LC_MAPPING_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">13</span>
|
|
<span class="definition-keyword">define</span> <span class="constant-syntax">TC_MAPPING_UNICODE_DATA_FIELD</span><span class="plain-syntax"> </span><span class="constant-syntax">14</span>
|
|
</pre>
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">void</span><span class="plain-syntax"> </span><span class="function-syntax">UnicodeLiterals::read_line</span><button class="popup" onclick="togglePopup('usagePopup5')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup5">Usage of <span class="code-font"><span class="function-syntax">UnicodeLiterals::read_line</span></span>:<br/><a href="3-ul.html#SP4">§4</a></span></button><span class="plain-syntax">(</span><span class="identifier-syntax">text_stream</span><span class="plain-syntax"> *</span><span class="identifier-syntax">text</span><span class="plain-syntax">, </span><span class="identifier-syntax">text_file_position</span><span class="plain-syntax"> *</span><span class="identifier-syntax">tfp</span><span class="plain-syntax">, </span><span class="reserved-syntax">void</span><span class="plain-syntax"> *</span><span class="identifier-syntax">vm</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">Str::trim_white_space</span><span class="plain-syntax">(</span><span class="identifier-syntax">text</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">wchar_t</span><span class="plain-syntax"> </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Str::get_first_char</span><span class="plain-syntax">(</span><span class="identifier-syntax">text</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">c</span><span class="plain-syntax"> == </span><span class="constant-syntax">0</span><span class="plain-syntax">) </span><span class="reserved-syntax">return</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">text_stream</span><span class="plain-syntax"> *</span><span class="identifier-syntax">name</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Str::new</span><span class="plain-syntax">();</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">TEMPORARY_TEXT</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">)</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">U</span><span class="plain-syntax">[16], </span><span class="identifier-syntax">field_number</span><span class="plain-syntax"> = </span><span class="constant-syntax">0</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">for</span><span class="plain-syntax"> (</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">f</span><span class="plain-syntax">=0; </span><span class="identifier-syntax">f</span><span class="plain-syntax"><16; </span><span class="identifier-syntax">f</span><span class="plain-syntax">++) </span><span class="identifier-syntax">U</span><span class="plain-syntax">[</span><span class="identifier-syntax">f</span><span class="plain-syntax">] = </span><span class="constant-syntax">0</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="3-ul.html#SP5_1" class="named-paragraph-link"><span class="named-paragraph">Parse the fields</span><span class="named-paragraph-number">5.1</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">field_number</span><span class="plain-syntax"> > </span><span class="constant-syntax">1</span><span class="plain-syntax">) && (</span><span class="identifier-syntax">U</span><span class="plain-syntax">[</span><span class="constant-syntax">CODE_VALUE_UNICODE_DATA_FIELD</span><span class="plain-syntax">] < </span><span class="constant-syntax">MAX_UNICODE_CODE_POINT</span><span class="plain-syntax">)) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Cn_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="3-ul.html#SP5_2" class="named-paragraph-link"><span class="named-paragraph">Determine the category code</span><span class="named-paragraph-number">5.2</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> *</span><span class="identifier-syntax">up</span><span class="plain-syntax"> = </span><a href="3-ul.html#SP3" class="function-link"><span class="function-syntax">UnicodeLiterals::code_point</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">U</span><span class="plain-syntax">[</span><span class="constant-syntax">CODE_VALUE_UNICODE_DATA_FIELD</span><span class="plain-syntax">]);</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="3-ul.html#SP5_3" class="named-paragraph-link"><span class="named-paragraph">Initialise the unicode point structure</span><span class="named-paragraph-number">5.3</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="named-paragraph-container code-font"><a href="3-ul.html#SP5_4" class="named-paragraph-link"><span class="named-paragraph">Add to the dictionary of character names</span><span class="named-paragraph-number">5.4</span></a></span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">DISCARD_TEXT</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">)</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP5_1" class="paragraph-anchor"></a><b>§5.1. </b><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Parse the fields</span><span class="named-paragraph-number">5.1</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">for</span><span class="plain-syntax"> (</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">i</span><span class="plain-syntax">=0; </span><span class="identifier-syntax">i</span><span class="plain-syntax"><</span><span class="identifier-syntax">Str::len</span><span class="plain-syntax">(</span><span class="identifier-syntax">text</span><span class="plain-syntax">); </span><span class="identifier-syntax">i</span><span class="plain-syntax">++) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">wchar_t</span><span class="plain-syntax"> </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Str::get_at</span><span class="plain-syntax">(</span><span class="identifier-syntax">text</span><span class="plain-syntax">, </span><span class="identifier-syntax">i</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">c</span><span class="plain-syntax"> == </span><span class="character-syntax">';'</span><span class="plain-syntax">) </span><span class="identifier-syntax">field_number</span><span class="plain-syntax">++;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">switch</span><span class="plain-syntax"> (</span><span class="identifier-syntax">field_number</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">case</span><span class="plain-syntax"> </span><span class="identifier-syntax">CODE_VALUE_UNICODE_DATA_FIELD:</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">case</span><span class="plain-syntax"> </span><span class="identifier-syntax">UC_MAPPING_UNICODE_DATA_FIELD:</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">case</span><span class="plain-syntax"> </span><span class="identifier-syntax">LC_MAPPING_UNICODE_DATA_FIELD:</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">case</span><span class="plain-syntax"> </span><span class="identifier-syntax">TC_MAPPING_UNICODE_DATA_FIELD:</span><span class="plain-syntax"> {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">H</span><span class="plain-syntax"> = -1;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">c</span><span class="plain-syntax"> >= </span><span class="character-syntax">'0'</span><span class="plain-syntax">) && (</span><span class="identifier-syntax">c</span><span class="plain-syntax"> <= </span><span class="character-syntax">'9'</span><span class="plain-syntax">)) </span><span class="identifier-syntax">H</span><span class="plain-syntax"> = (</span><span class="reserved-syntax">int</span><span class="plain-syntax">) (</span><span class="identifier-syntax">c</span><span class="plain-syntax"> - </span><span class="character-syntax">'0'</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> ((</span><span class="identifier-syntax">c</span><span class="plain-syntax"> >= </span><span class="character-syntax">'A'</span><span class="plain-syntax">) && (</span><span class="identifier-syntax">c</span><span class="plain-syntax"> <= </span><span class="character-syntax">'F'</span><span class="plain-syntax">)) </span><span class="identifier-syntax">H</span><span class="plain-syntax"> = (</span><span class="reserved-syntax">int</span><span class="plain-syntax">) (</span><span class="identifier-syntax">c</span><span class="plain-syntax"> - </span><span class="character-syntax">'A'</span><span class="plain-syntax"> + </span><span class="constant-syntax">10</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">H</span><span class="plain-syntax"> >= </span><span class="constant-syntax">0</span><span class="plain-syntax">) </span><span class="identifier-syntax">U</span><span class="plain-syntax">[</span><span class="identifier-syntax">field_number</span><span class="plain-syntax">] = </span><span class="identifier-syntax">U</span><span class="plain-syntax">[</span><span class="identifier-syntax">field_number</span><span class="plain-syntax">]*16 + </span><span class="identifier-syntax">H</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">break</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">case</span><span class="plain-syntax"> </span><span class="identifier-syntax">NAME_UNICODE_DATA_FIELD:</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">PUT_TO</span><span class="plain-syntax">(</span><span class="identifier-syntax">name</span><span class="plain-syntax">, </span><span class="identifier-syntax">c</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">break</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">case</span><span class="plain-syntax"> </span><span class="identifier-syntax">GENERAL_CATEGORY_UNICODE_DATA_FIELD:</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">PUT_TO</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">c</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">break</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> }</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="3-ul.html#SP5">§5</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP5_2" class="paragraph-anchor"></a><b>§5.2. </b><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Determine the category code</span><span class="named-paragraph-number">5.2</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Cc"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Cc_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Cf"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Cf_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Cn"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Cn_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Co"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Co_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Cs"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Cs_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Ll"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Ll_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Lm"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Lm_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Lo"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Lo_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Lt"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Lt_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Lu"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Lu_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Mc"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Mc_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Me"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Me_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Mn"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Mn_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Nd"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Nd_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Nl"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Nl_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"No"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">No_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Pc"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Pc_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Pd"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Pd_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Pe"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Pe_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Pf"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Pf_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Pi"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Pi_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Po"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Po_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Ps"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Ps_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Sc"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Sc_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Sk"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Sk_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Sm"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Sm_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"So"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">So_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Zl"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Zl_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Zp"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Zp_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Str::eq</span><span class="plain-syntax">(</span><span class="identifier-syntax">category</span><span class="plain-syntax">, </span><span class="identifier-syntax">I</span><span class="string-syntax">"Zs"</span><span class="plain-syntax">)) </span><span class="identifier-syntax">c</span><span class="plain-syntax"> = </span><span class="constant-syntax">Zs_UNICODE_CAT</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">else</span><span class="plain-syntax"> </span><span class="identifier-syntax">LOG</span><span class="plain-syntax">(</span><span class="string-syntax">"Unknown category '%S'\n"</span><span class="plain-syntax">, </span><span class="identifier-syntax">category</span><span class="plain-syntax">);</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="3-ul.html#SP5">§5</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP5_3" class="paragraph-anchor"></a><b>§5.3. </b><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Initialise the unicode point structure</span><span class="named-paragraph-number">5.3</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">-></span><span class="element-syntax">name</span><span class="plain-syntax"> = </span><span class="identifier-syntax">name</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">-></span><span class="element-syntax">category</span><span class="plain-syntax"> = </span><span class="identifier-syntax">c</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">-></span><span class="element-syntax">tolower</span><span class="plain-syntax"> = </span><span class="identifier-syntax">U</span><span class="plain-syntax">[</span><span class="constant-syntax">LC_MAPPING_UNICODE_DATA_FIELD</span><span class="plain-syntax">];</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">-></span><span class="element-syntax">toupper</span><span class="plain-syntax"> = </span><span class="identifier-syntax">U</span><span class="plain-syntax">[</span><span class="constant-syntax">UC_MAPPING_UNICODE_DATA_FIELD</span><span class="plain-syntax">];</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">-></span><span class="element-syntax">totitle</span><span class="plain-syntax"> = </span><span class="identifier-syntax">U</span><span class="plain-syntax">[</span><span class="constant-syntax">TC_MAPPING_UNICODE_DATA_FIELD</span><span class="plain-syntax">];</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="3-ul.html#SP5">§5</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP5_4" class="paragraph-anchor"></a><b>§5.4. </b>Control codes in Unicode, a residue of ASCII, are given no names by the
|
|
standard. For example:
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> 0004;<control>;Cc;0;BN;;;;;N;END OF TRANSMISSION;;;;</span>
|
|
</pre>
|
|
<p class="commentary">Indeed, at present every code with category <span class="extract"><span class="extract-syntax">Cc</span></span> has the pseudo-name <span class="extract"><span class="extract-syntax"><control></span></span>.
|
|
So we will mostly not allow these to be referred to by name in Inform. (In theory we
|
|
could read the ISO-10646 comment as if it were a name: here, that would be
|
|
"END OF TRANSMISSION", which isn't too bad. But "FORM FEED (FF)" and
|
|
"CHARACTER TABULATION" are less persuasive, and anyway, we don't actually want
|
|
users to insert control characters into Inform text literals.)
|
|
</p>
|
|
|
|
<p class="commentary"><span class="named-paragraph-container code-font"><span class="named-paragraph-defn">Add to the dictionary of character names</span><span class="named-paragraph-number">5.4</span></span><span class="comment-syntax"> =</span>
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">text_stream</span><span class="plain-syntax"> *</span><span class="identifier-syntax">index</span><span class="plain-syntax"> = </span><span class="identifier-syntax">NULL</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">c</span><span class="plain-syntax"> == </span><span class="constant-syntax">Cc_UNICODE_CAT</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">U</span><span class="plain-syntax">[</span><span class="constant-syntax">CODE_VALUE_UNICODE_DATA_FIELD</span><span class="plain-syntax">] == </span><span class="constant-syntax">9</span><span class="plain-syntax">) </span><span class="identifier-syntax">index</span><span class="plain-syntax"> = </span><span class="identifier-syntax">I</span><span class="string-syntax">"TAB"</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">U</span><span class="plain-syntax">[</span><span class="constant-syntax">CODE_VALUE_UNICODE_DATA_FIELD</span><span class="plain-syntax">] == </span><span class="constant-syntax">10</span><span class="plain-syntax">) </span><span class="identifier-syntax">index</span><span class="plain-syntax"> = </span><span class="identifier-syntax">I</span><span class="string-syntax">"NEWLINE"</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> } </span><span class="reserved-syntax">else</span><span class="plain-syntax"> {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">index</span><span class="plain-syntax"> = </span><span class="identifier-syntax">name</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">index</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">Dictionaries::create</span><span class="plain-syntax">(</span><span class="identifier-syntax">UnicodeData_lookup</span><span class="plain-syntax">, </span><span class="identifier-syntax">name</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">unicode_lookup_value</span><span class="plain-syntax"> *</span><span class="identifier-syntax">ulv</span><span class="plain-syntax"> = </span><span class="identifier-syntax">CREATE</span><span class="plain-syntax">(</span><span class="reserved-syntax">unicode_lookup_value</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">ulv</span><span class="plain-syntax">-></span><span class="element-syntax">code_point</span><span class="plain-syntax"> = </span><span class="identifier-syntax">U</span><span class="plain-syntax">[</span><span class="constant-syntax">CODE_VALUE_UNICODE_DATA_FIELD</span><span class="plain-syntax">];</span>
|
|
<span class="plain-syntax"> </span><span class="identifier-syntax">Dictionaries::write_value</span><span class="plain-syntax">(</span><span class="identifier-syntax">UnicodeData_lookup</span><span class="plain-syntax">, </span><span class="identifier-syntax">name</span><span class="plain-syntax">, (</span><span class="reserved-syntax">void</span><span class="plain-syntax"> *) </span><span class="identifier-syntax">ulv</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> }</span>
|
|
</pre>
|
|
<ul class="endnotetexts"><li>This code is used in <a href="3-ul.html#SP5">§5</a>.</li></ul>
|
|
<p class="commentary firstcommentary"><a id="SP6" class="paragraph-anchor"></a><b>§6. Using the Unicode data. </b>The first lookup here is slow, since it requires us to parse the Unicode
|
|
specification data file. But after that everything runs quite swiftly.
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="function-syntax">UnicodeLiterals::parse</span><button class="popup" onclick="togglePopup('usagePopup6')"><span class="comment-syntax">?</span><span class="popuptext" id="usagePopup6">Usage of <span class="code-font"><span class="function-syntax">UnicodeLiterals::parse</span></span>:<br/><a href="3-ul.html#SP1">§1</a></span></button><span class="plain-syntax">(</span><span class="identifier-syntax">text_stream</span><span class="plain-syntax"> *</span><span class="identifier-syntax">N</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><a href="3-ul.html#SP4" class="function-link"><span class="function-syntax">UnicodeLiterals::ensure_data</span></a><span class="plain-syntax">();</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">Dictionaries::find</span><span class="plain-syntax">(</span><span class="identifier-syntax">UnicodeData_lookup</span><span class="plain-syntax">, </span><span class="identifier-syntax">N</span><span class="plain-syntax">)) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">unicode_lookup_value</span><span class="plain-syntax"> *</span><span class="identifier-syntax">ulv</span><span class="plain-syntax"> = </span><span class="identifier-syntax">Dictionaries::read_value</span><span class="plain-syntax">(</span><span class="identifier-syntax">UnicodeData_lookup</span><span class="plain-syntax">, </span><span class="identifier-syntax">N</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">ulv</span><span class="plain-syntax">-></span><span class="element-syntax">code_point</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> }</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> -1;</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<p class="commentary firstcommentary"><a id="SP7" class="paragraph-anchor"></a><b>§7. </b>We won't go too far down the Unicode rabbit-hole, but here are functions which
|
|
may some day be useful:
|
|
</p>
|
|
|
|
<pre class="displayed-code all-displayed-code code-font">
|
|
<span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="function-syntax">UnicodeLiterals::tolower</span><span class="plain-syntax">(</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">C</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> *</span><span class="identifier-syntax">up</span><span class="plain-syntax"> = </span><a href="3-ul.html#SP3" class="function-link"><span class="function-syntax">UnicodeLiterals::code_point</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">C</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">D</span><span class="plain-syntax"> = </span><span class="identifier-syntax">up</span><span class="plain-syntax">-></span><span class="element-syntax">tolower</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">D</span><span class="plain-syntax"> >= </span><span class="constant-syntax">0</span><span class="plain-syntax">) </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">D</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">C</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
<span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="function-syntax">UnicodeLiterals::toupper</span><span class="plain-syntax">(</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">C</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> *</span><span class="identifier-syntax">up</span><span class="plain-syntax"> = </span><a href="3-ul.html#SP3" class="function-link"><span class="function-syntax">UnicodeLiterals::code_point</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">C</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">D</span><span class="plain-syntax"> = </span><span class="identifier-syntax">up</span><span class="plain-syntax">-></span><span class="element-syntax">toupper</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">D</span><span class="plain-syntax"> >= </span><span class="constant-syntax">0</span><span class="plain-syntax">) </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">D</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">C</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
<span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="function-syntax">UnicodeLiterals::totitle</span><span class="plain-syntax">(</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">C</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> *</span><span class="identifier-syntax">up</span><span class="plain-syntax"> = </span><a href="3-ul.html#SP3" class="function-link"><span class="function-syntax">UnicodeLiterals::code_point</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">C</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">D</span><span class="plain-syntax"> = </span><span class="identifier-syntax">up</span><span class="plain-syntax">-></span><span class="element-syntax">totitle</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">if</span><span class="plain-syntax"> (</span><span class="identifier-syntax">D</span><span class="plain-syntax"> >= </span><span class="constant-syntax">0</span><span class="plain-syntax">) </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">D</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">C</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
<span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="function-syntax">UnicodeLiterals::category</span><span class="plain-syntax">(</span><span class="reserved-syntax">int</span><span class="plain-syntax"> </span><span class="identifier-syntax">C</span><span class="plain-syntax">) {</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">unicode_point</span><span class="plain-syntax"> *</span><span class="identifier-syntax">up</span><span class="plain-syntax"> = </span><a href="3-ul.html#SP3" class="function-link"><span class="function-syntax">UnicodeLiterals::code_point</span></a><span class="plain-syntax">(</span><span class="identifier-syntax">C</span><span class="plain-syntax">);</span>
|
|
<span class="plain-syntax"> </span><span class="reserved-syntax">return</span><span class="plain-syntax"> </span><span class="identifier-syntax">up</span><span class="plain-syntax">-></span><span class="element-syntax">category</span><span class="plain-syntax">;</span>
|
|
<span class="plain-syntax">}</span>
|
|
</pre>
|
|
<nav role="progress"><div class="progresscontainer">
|
|
<ul class="progressbar"><li class="progressprev"><a href="3-tod.html">❮</a></li><li class="progresschapter"><a href="P-wtmd.html">P</a></li><li class="progresschapter"><a href="1-vm.html">1</a></li><li class="progresschapter"><a href="2-spc.html">2</a></li><li class="progresscurrentchapter">3</li><li class="progresssection"><a href="3-pl.html">pl</a></li><li class="progresssection"><a href="3-lp.html">lp</a></li><li class="progresssection"><a href="3-lrn.html">lrn</a></li><li class="progresssection"><a href="3-tod.html">tod</a></li><li class="progresscurrent">ul</li><li class="progresssection"><a href="3-tt.html">tt</a></li><li class="progresssection"><a href="3-ll.html">ll</a></li><li class="progresschapter"><a href="4-ets.html">4</a></li><li class="progresschapter"><a href="5-dsh.html">5</a></li><li class="progressnext"><a href="3-tt.html">❯</a></li></ul></div>
|
|
</nav><!--End of weave-->
|
|
|
|
</main>
|
|
</body>
|
|
</html>
|
|
|