2021-07-05 01:04:18 +03:00
|
|
|
[IndexLexicon::] Lexicon.
|
2021-06-11 01:22:43 +03:00
|
|
|
|
|
|
|
A lexicon for nouns, adjectives and verbs found in an Inter tree.
|
|
|
|
|
|
|
|
@ The lexicon is the part of the Index which gives an alphabetised list of
|
|
|
|
adjectives, nouns, verbs and other words which can be used in descriptions
|
|
|
|
of things: it's the nearest thing to an index of the meanings inside Inform.
|
|
|
|
This is in one sense quite an elaborate indexing mechanism, since it brings
|
|
|
|
together meanings relating to various different Inform structures under a single
|
|
|
|
umbrella, the "lexicon entry" structure:
|
|
|
|
|
2021-06-12 15:08:47 +03:00
|
|
|
@d COMMON_NOUN_TLEXE 1 /* a kind */
|
2021-06-11 01:22:43 +03:00
|
|
|
@d PROPER_NOUN_TLEXE 2 /* an instance of "object" */
|
|
|
|
@d ADJECTIVAL_PHRASE_TLEXE 3 /* the subject of a "Definition:" */
|
|
|
|
@d ENUMERATED_CONSTANT_TLEXE 4 /* e.g., "green" if colour is a kind of value and green a colour */
|
|
|
|
@d VERB_TLEXE 5 /* an ordinary verb */
|
|
|
|
@d PREP_TLEXE 7 /* a "to be upon..." sort of verb */
|
|
|
|
@d MVERB_TLEXE 9 /* a meaningless verb */
|
|
|
|
|
|
|
|
@ We can set entries either to excerpts of words from the source, or to
|
|
|
|
any collation of up to 5 vocabulary entries.
|
|
|
|
|
|
|
|
=
|
|
|
|
typedef struct index_tlexicon_entry {
|
2021-06-12 01:33:59 +03:00
|
|
|
struct text_stream *lemma;
|
2021-06-11 01:22:43 +03:00
|
|
|
int part_of_speech; /* one of those above */
|
|
|
|
char *category; /* textual description of said, e.g., |"adjective"| */
|
|
|
|
struct general_pointer entry_refers_to; /* depending on which part of speech */
|
|
|
|
struct parse_node *verb_defined_at; /* sentence where defined (verbs only) */
|
|
|
|
char *gloss_note; /* gloss on the definition, or |NULL| if none is provided */
|
2021-06-12 01:33:59 +03:00
|
|
|
struct inter_package *lex_package;
|
2021-06-12 12:42:20 +03:00
|
|
|
int link_to; /* word number in source text */
|
2021-06-11 01:22:43 +03:00
|
|
|
struct text_stream *reduced_to_lower_case; /* text converted to lower case for sorting */
|
|
|
|
struct index_tlexicon_entry *sorted_next; /* next in lexicographic order */
|
|
|
|
CLASS_DEFINITION
|
|
|
|
} index_tlexicon_entry;
|
|
|
|
|
|
|
|
@
|
|
|
|
|
|
|
|
= (early code)
|
|
|
|
index_tlexicon_entry *sorted_tlexicon = NULL; /* head of list in lexicographic order */
|
|
|
|
|
|
|
|
@ Lexicon entries are created by the following routine:
|
|
|
|
|
|
|
|
=
|
2021-07-05 01:04:18 +03:00
|
|
|
index_tlexicon_entry *IndexLexicon::lexicon_new_entry(text_stream *lemma, int part) {
|
2021-06-11 01:22:43 +03:00
|
|
|
index_tlexicon_entry *lex = CREATE(index_tlexicon_entry);
|
2021-06-12 01:33:59 +03:00
|
|
|
lex->lemma = Str::duplicate(lemma);
|
2021-06-12 15:08:47 +03:00
|
|
|
lex->part_of_speech = part;
|
2021-06-11 01:22:43 +03:00
|
|
|
lex->entry_refers_to = NULL_GENERAL_POINTER;
|
|
|
|
lex->category = NULL; lex->gloss_note = NULL; lex->verb_defined_at = NULL;
|
|
|
|
lex->reduced_to_lower_case = Str::new();
|
2021-06-12 01:33:59 +03:00
|
|
|
lex->lex_package = NULL;
|
2021-06-12 12:42:20 +03:00
|
|
|
lex->link_to = 0;
|
2021-06-11 01:22:43 +03:00
|
|
|
return lex;
|
|
|
|
}
|
|
|
|
|
2021-06-12 01:33:59 +03:00
|
|
|
@
|
2021-06-11 01:22:43 +03:00
|
|
|
|
|
|
|
=
|
2021-07-05 01:04:18 +03:00
|
|
|
index_tlexicon_entry *IndexLexicon::new_entry_with_details(text_stream *lemma, int pos,
|
2021-06-12 01:33:59 +03:00
|
|
|
char *category, char *gloss) {
|
2021-07-05 01:04:18 +03:00
|
|
|
index_tlexicon_entry *lex = IndexLexicon::lexicon_new_entry(lemma, pos);
|
2021-06-12 01:33:59 +03:00
|
|
|
lex->lemma = lemma;
|
2021-06-11 01:22:43 +03:00
|
|
|
lex->category = category; lex->gloss_note = gloss;
|
|
|
|
return lex;
|
|
|
|
}
|
|
|
|
|
2021-07-05 01:04:18 +03:00
|
|
|
index_tlexicon_entry *IndexLexicon::new_main_verb(text_stream *infinitive, int part,
|
2021-06-12 01:33:59 +03:00
|
|
|
inter_package *pack) {
|
2021-07-05 01:04:18 +03:00
|
|
|
index_tlexicon_entry *lex = IndexLexicon::lexicon_new_entry(NULL, part);
|
2021-06-12 01:33:59 +03:00
|
|
|
lex->lemma = infinitive;
|
2021-06-11 01:22:43 +03:00
|
|
|
lex->category = "verb";
|
2021-06-12 01:33:59 +03:00
|
|
|
lex->lex_package = pack;
|
2021-06-11 01:22:43 +03:00
|
|
|
// lex->verb_defined_at = current_sentence;
|
|
|
|
return lex;
|
|
|
|
}
|
|
|
|
|
|
|
|
@h Printing the lexicon out in HTML format.
|
|
|
|
|
|
|
|
@ Now for the bulk of the work. Entries appear in CSS paragraphs with hanging
|
|
|
|
indentation and no interparagraph spacing, so we need to insert regular
|
|
|
|
paragraphs between the As and the Bs, then between the Bs and the Cs, and so
|
|
|
|
on. Each entry consists of the wording, then maybe some icons, then an
|
|
|
|
explanation of what it is: for instance,
|
|
|
|
|
|
|
|
>> player's holdall [icon]\quad {\it noun, a kind of} container
|
|
|
|
|
|
|
|
In a few cases, there is a further textual gloss to add.
|
|
|
|
|
2021-06-12 12:42:20 +03:00
|
|
|
=
|
2021-07-05 01:04:18 +03:00
|
|
|
void IndexLexicon::listing(OUTPUT_STREAM, int proper_nouns_only) {
|
2021-06-11 01:22:43 +03:00
|
|
|
index_tlexicon_entry *lex;
|
|
|
|
wchar_t current_initial_letter = '?';
|
2021-06-12 12:42:20 +03:00
|
|
|
int verb_count = 0, proper_noun_count = 0, c;
|
2021-06-11 01:22:43 +03:00
|
|
|
for (lex = sorted_tlexicon; lex; lex = lex->sorted_next)
|
|
|
|
if (lex->part_of_speech == PROPER_NOUN_TLEXE)
|
2021-06-12 12:42:20 +03:00
|
|
|
proper_noun_count++;
|
|
|
|
if (proper_nouns_only) {
|
2021-06-11 01:22:43 +03:00
|
|
|
HTML::begin_html_table(OUT, NULL, TRUE, 0, 0, 0, 0, 0);
|
|
|
|
HTML::first_html_column(OUT, 0);
|
|
|
|
}
|
|
|
|
for (c = 0, lex = sorted_tlexicon; lex; lex = lex->sorted_next) {
|
2021-06-12 12:42:20 +03:00
|
|
|
if (proper_nouns_only) { if (lex->part_of_speech != PROPER_NOUN_TLEXE) continue; }
|
2021-06-11 01:22:43 +03:00
|
|
|
else { if (lex->part_of_speech == PROPER_NOUN_TLEXE) continue; }
|
2021-06-12 12:42:20 +03:00
|
|
|
if ((proper_nouns_only) && (c == proper_noun_count/2)) HTML::next_html_column(OUT, 0);
|
2021-06-11 01:22:43 +03:00
|
|
|
if (current_initial_letter != Str::get_first_char(lex->reduced_to_lower_case)) {
|
|
|
|
if (c > 0) { HTML_OPEN("p"); HTML_CLOSE("p"); }
|
|
|
|
current_initial_letter = Str::get_first_char(lex->reduced_to_lower_case);
|
|
|
|
}
|
|
|
|
c++;
|
|
|
|
HTML_OPEN_WITH("p", "class=\"hang\"");
|
|
|
|
|
|
|
|
@<Text of the actual lexicon entry@>;
|
|
|
|
@<Icon with link to documentation, source or verb table, if any@>;
|
|
|
|
|
|
|
|
switch(lex->part_of_speech) {
|
2021-06-12 15:08:47 +03:00
|
|
|
case ADJECTIVAL_PHRASE_TLEXE:
|
|
|
|
@<Definition of adjectival phrase entry@>; break;
|
|
|
|
case ENUMERATED_CONSTANT_TLEXE:
|
|
|
|
@<Definition of enumerated instance entry@>; break;
|
2021-06-11 01:22:43 +03:00
|
|
|
case PROPER_NOUN_TLEXE:
|
|
|
|
@<Definition of proper noun entry@>; break;
|
2021-06-12 15:08:47 +03:00
|
|
|
case COMMON_NOUN_TLEXE:
|
|
|
|
@<Definition of common noun entry@>; break;
|
2021-06-11 01:22:43 +03:00
|
|
|
}
|
|
|
|
if (lex->gloss_note) WRITE(" <i>%s</i>", lex->gloss_note);
|
|
|
|
HTML_CLOSE("p");
|
|
|
|
}
|
2021-06-12 12:42:20 +03:00
|
|
|
if (proper_nouns_only) { HTML::end_html_row(OUT); HTML::end_html_table(OUT); }
|
|
|
|
}
|
2021-06-11 01:22:43 +03:00
|
|
|
|
|
|
|
@ In traditional dictionary fashion, we present the text in what may not be
|
|
|
|
the most normal ordering, in order to place the alphabetically important
|
|
|
|
part first: thus "see, to be able to" rather than "to be able to see".
|
|
|
|
(Compare "Gallifreyan High Council, continual incidences of madness and
|
|
|
|
treachery amongst the" in "Doctor Who: The Completely Useless
|
|
|
|
Encyclopaedia", eds. Howarth and Lyons (1996).)
|
|
|
|
|
|
|
|
@<Text of the actual lexicon entry@> =
|
2021-06-12 12:42:20 +03:00
|
|
|
WRITE("%S", lex->lemma);
|
2021-06-11 01:22:43 +03:00
|
|
|
if (lex->part_of_speech == PREP_TLEXE) WRITE(", to be");
|
|
|
|
|
|
|
|
@ Main lexicon entries to do with verbs link further down the index page
|
|
|
|
to the corresponding entries in the verb table. We want to use numbered
|
|
|
|
anchors for these links, but we want to avoid colliding with numbered
|
|
|
|
anchors already used for other purposes higher up on the Phrasebook index
|
|
|
|
page. So we use a set of anchors numbered 10000 and up, which is guaranteed
|
|
|
|
not to coincide with any of those.
|
|
|
|
|
|
|
|
We omit source links to an adjectival phrase because these are polymorphic,
|
|
|
|
that is, the phrase may have multiple definitions in different parts of the
|
|
|
|
source text: so any single link would be potentially misleading.
|
|
|
|
|
|
|
|
@<Icon with link to documentation, source or verb table, if any@> =
|
|
|
|
switch(lex->part_of_speech) {
|
2021-06-12 15:08:47 +03:00
|
|
|
case COMMON_NOUN_TLEXE: {
|
|
|
|
text_stream *ref = Metadata::read_optional_textual(lex->lex_package, I"^documentation");
|
2021-07-09 00:56:01 +03:00
|
|
|
if (Str::len(ref) > 0) IndexUtilities::DocReferences::link(OUT, ref);
|
2021-06-11 01:22:43 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case VERB_TLEXE:
|
|
|
|
case PREP_TLEXE:
|
2021-07-09 00:56:01 +03:00
|
|
|
IndexUtilities::below_link_numbered(OUT, 10000+verb_count++);
|
2021-06-11 01:22:43 +03:00
|
|
|
break;
|
|
|
|
}
|
2021-06-12 12:42:20 +03:00
|
|
|
if ((lex->part_of_speech != ADJECTIVAL_PHRASE_TLEXE) && (lex->link_to > 0))
|
2021-07-09 00:56:01 +03:00
|
|
|
IndexUtilities::link(OUT, lex->link_to);
|
2021-06-11 01:22:43 +03:00
|
|
|
|
2021-06-12 15:08:47 +03:00
|
|
|
@<Definition of common noun entry@> =
|
|
|
|
@<Begin definition text@>;
|
|
|
|
WRITE(", a kind of ");
|
|
|
|
text_stream *superk = Metadata::read_optional_textual(lex->lex_package, I"^index_superkind");
|
|
|
|
if (Str::len(superk) > 0) WRITE("%S", superk);
|
|
|
|
@<End definition text@>;
|
2021-06-11 01:22:43 +03:00
|
|
|
|
|
|
|
@ Simply the name of an instance.
|
|
|
|
|
|
|
|
@<Definition of proper noun entry@> =
|
2021-06-12 12:42:20 +03:00
|
|
|
@<Begin definition text@>;
|
|
|
|
WRITE("%S", Metadata::read_textual(lex->lex_package, I"^index_kind"));
|
|
|
|
@<End definition text@>;
|
2021-06-11 01:22:43 +03:00
|
|
|
|
|
|
|
@ As mentioned above, an adjectival phrase can be multiply defined in
|
|
|
|
different contexts. We want to quote all of those.
|
|
|
|
|
|
|
|
@<Definition of adjectival phrase entry@> =
|
|
|
|
@<Begin definition text@>;
|
2021-06-12 15:08:47 +03:00
|
|
|
WRITE(": %S", Metadata::read_textual(lex->lex_package, I"^index_entry"));
|
2021-06-11 01:22:43 +03:00
|
|
|
@<End definition text@>;
|
|
|
|
|
|
|
|
@<Definition of enumerated instance entry@> =
|
|
|
|
@<Begin definition text@>;
|
2021-06-12 15:08:47 +03:00
|
|
|
WRITE(", value of %S", Metadata::read_textual(lex->lex_package, I"^index_kind"));
|
2021-06-11 01:22:43 +03:00
|
|
|
@<End definition text@>;
|
|
|
|
|
|
|
|
@<Begin definition text@> =
|
|
|
|
WRITE(" ... <i>");
|
2021-06-12 12:42:20 +03:00
|
|
|
if ((proper_nouns_only == FALSE) && (lex->category))
|
2021-06-11 01:22:43 +03:00
|
|
|
WRITE("%s", lex->category);
|
|
|
|
|
|
|
|
@<End definition text@> =
|
|
|
|
WRITE("</i>");
|
|
|
|
|
|
|
|
@h Index tabulation.
|
|
|
|
The following produces the table of verbs in the Phrasebook Index page.
|
|
|
|
|
|
|
|
=
|
2021-06-12 01:33:59 +03:00
|
|
|
inter_tree *tree_stored_by_lexicon = NULL;
|
2021-07-05 01:04:18 +03:00
|
|
|
void IndexLexicon::stock(inter_tree *I) {
|
2021-06-12 01:33:59 +03:00
|
|
|
if (I == tree_stored_by_lexicon) return;
|
|
|
|
tree_stored_by_lexicon = I;
|
|
|
|
tree_inventory *inv = Synoptic::inv(I);
|
|
|
|
TreeLists::sort(inv->verb_nodes, Synoptic::module_order);
|
|
|
|
for (int i=0; i<TreeLists::len(inv->verb_nodes); i++) {
|
|
|
|
inter_package *pack = Inter::Package::defined_by_frame(inv->verb_nodes->list[i].node);
|
2021-06-12 12:42:20 +03:00
|
|
|
index_tlexicon_entry *lex;
|
2021-06-12 01:33:59 +03:00
|
|
|
if (Metadata::read_numeric(pack, I"^meaningless"))
|
2021-07-05 01:04:18 +03:00
|
|
|
lex = IndexLexicon::new_main_verb(Metadata::read_textual(pack, I"^infinitive"), MVERB_TLEXE, pack);
|
2021-06-12 01:33:59 +03:00
|
|
|
else
|
2021-07-05 01:04:18 +03:00
|
|
|
lex = IndexLexicon::new_main_verb(Metadata::read_textual(pack, I"^infinitive"), VERB_TLEXE, pack);
|
2021-06-12 12:42:20 +03:00
|
|
|
lex->link_to = (int) Metadata::read_numeric(pack, I"^at");
|
2021-06-12 01:33:59 +03:00
|
|
|
}
|
2021-06-12 11:45:06 +03:00
|
|
|
for (int i=0; i<TreeLists::len(inv->preposition_nodes); i++) {
|
|
|
|
inter_package *pack = Inter::Package::defined_by_frame(inv->preposition_nodes->list[i].node);
|
2021-07-05 01:04:18 +03:00
|
|
|
index_tlexicon_entry *lex = IndexLexicon::new_main_verb(Metadata::read_textual(pack, I"^text"), PREP_TLEXE, pack);
|
2021-06-12 12:42:20 +03:00
|
|
|
lex->link_to = (int) Metadata::read_numeric(pack, I"^at");
|
|
|
|
}
|
2021-06-12 15:08:47 +03:00
|
|
|
for (int i=0; i<TreeLists::len(inv->adjective_nodes); i++) {
|
|
|
|
inter_package *pack = Inter::Package::defined_by_frame(inv->adjective_nodes->list[i].node);
|
|
|
|
text_stream *lemma = Metadata::read_textual(pack, I"^text");
|
|
|
|
if (Str::len(lemma) > 0) {
|
2021-07-05 01:04:18 +03:00
|
|
|
index_tlexicon_entry *lex = IndexLexicon::lexicon_new_entry(lemma, ADJECTIVAL_PHRASE_TLEXE);
|
2021-06-12 15:08:47 +03:00
|
|
|
lex->category = "adjective";
|
|
|
|
lex->lex_package = pack;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (int i=0; i<TreeLists::len(inv->kind_nodes); i++) {
|
|
|
|
inter_package *pack = Inter::Package::defined_by_frame(inv->kind_nodes->list[i].node);
|
|
|
|
if ((Metadata::read_optional_numeric(pack, I"^is_base")) &&
|
|
|
|
(Metadata::read_optional_numeric(pack, I"^is_subkind_of_object"))) {
|
2021-07-05 01:04:18 +03:00
|
|
|
index_tlexicon_entry *lex = IndexLexicon::lexicon_new_entry(Metadata::read_textual(pack, I"^name"), COMMON_NOUN_TLEXE);
|
2021-06-12 15:08:47 +03:00
|
|
|
lex->link_to = (int) Metadata::read_numeric(pack, I"^at");
|
|
|
|
lex->category = "noun";
|
|
|
|
lex->lex_package = pack;
|
|
|
|
}
|
|
|
|
}
|
2021-06-12 12:42:20 +03:00
|
|
|
for (int i=0; i<TreeLists::len(inv->instance_nodes); i++) {
|
|
|
|
inter_package *pack = Inter::Package::defined_by_frame(inv->instance_nodes->list[i].node);
|
|
|
|
if (Metadata::read_optional_numeric(pack, I"^is_object")) {
|
2021-07-05 01:04:18 +03:00
|
|
|
index_tlexicon_entry *lex = IndexLexicon::lexicon_new_entry(Metadata::read_textual(pack, I"^name"), PROPER_NOUN_TLEXE);
|
2021-06-12 15:08:47 +03:00
|
|
|
lex->link_to = (int) Metadata::read_numeric(pack, I"^at");
|
|
|
|
lex->category = "noun";
|
|
|
|
lex->lex_package = pack;
|
|
|
|
} else {
|
2021-07-05 01:04:18 +03:00
|
|
|
index_tlexicon_entry *lex = IndexLexicon::lexicon_new_entry(Metadata::read_textual(pack, I"^name"), ENUMERATED_CONSTANT_TLEXE);
|
2021-06-12 12:42:20 +03:00
|
|
|
lex->link_to = (int) Metadata::read_numeric(pack, I"^at");
|
|
|
|
lex->category = "noun";
|
|
|
|
lex->lex_package = pack;
|
|
|
|
}
|
2021-06-12 11:45:06 +03:00
|
|
|
}
|
2021-06-12 01:33:59 +03:00
|
|
|
@<Create lower-case forms of all lexicon entries dash@>;
|
|
|
|
@<Sort the lexicon into alphabetical order dash@>;
|
|
|
|
}
|
|
|
|
|
|
|
|
@ Before we can sort the lexicon, we need to turn its disparate forms of name
|
|
|
|
into a single, canonical, lower-case representation.
|
|
|
|
|
|
|
|
@<Create lower-case forms of all lexicon entries dash@> =
|
|
|
|
index_tlexicon_entry *lex;
|
|
|
|
LOOP_OVER(lex, index_tlexicon_entry) {
|
|
|
|
Str::copy(lex->reduced_to_lower_case, lex->lemma);
|
|
|
|
LOOP_THROUGH_TEXT(pos, lex->reduced_to_lower_case)
|
|
|
|
Str::put(pos, Characters::tolower(Str::get(pos)));
|
|
|
|
}
|
|
|
|
|
|
|
|
@ The lexicon is sorted by insertion sort, which is not ideally fast, but
|
|
|
|
which is convenient when dealing with linked lists: there are unlikely to be
|
|
|
|
more than 1000 or so entries, so the speed penalty for insertion rather
|
|
|
|
than (say) quicksort is not great.
|
|
|
|
|
|
|
|
@<Sort the lexicon into alphabetical order dash@> =
|
|
|
|
index_tlexicon_entry *lex;
|
|
|
|
LOOP_OVER(lex, index_tlexicon_entry) {
|
|
|
|
index_tlexicon_entry *lex2, *last_lex;
|
|
|
|
if (sorted_tlexicon == NULL) {
|
|
|
|
sorted_tlexicon = lex; lex->sorted_next = NULL; continue;
|
|
|
|
}
|
|
|
|
for (last_lex = NULL, lex2 = sorted_tlexicon; lex2;
|
|
|
|
last_lex = lex2, lex2 = lex2->sorted_next)
|
|
|
|
if (Str::cmp(lex->reduced_to_lower_case, lex2->reduced_to_lower_case) < 0) {
|
|
|
|
if (last_lex == NULL) sorted_tlexicon = lex;
|
|
|
|
else last_lex->sorted_next = lex;
|
|
|
|
lex->sorted_next = lex2; goto Inserted;
|
|
|
|
}
|
|
|
|
last_lex->sorted_next = lex; lex->sorted_next = NULL;
|
|
|
|
Inserted: ;
|
|
|
|
}
|