1
0
Fork 0
mirror of https://github.com/ganelson/inform.git synced 2024-07-05 08:34:22 +03:00
inform7/inbuild/inbuild-module/Chapter 5/Source Text.w

225 lines
7.8 KiB
OpenEdge ABL
Raw Normal View History

[SourceText::] Source Text.
Code for reading Inform 7 source text, which Inbuild uses for both extensions
and projects.
2020-02-17 11:43:20 +02:00
@ This short function is a bridge to the lexer, and is used for reading
text files of source into either projects or extensions. Note that it
doesn't attach the fed text to the copy: the copy may need to contain text
from multiple files and indeed from elsewhere.
=
2020-02-18 01:50:21 +02:00
inbuild_copy *currently_lexing_into = NULL;
2020-02-17 11:43:20 +02:00
source_file *SourceText::read_file(inbuild_copy *C, filename *F, text_stream *synopsis,
int documentation_only, int primary) {
2020-02-18 01:50:21 +02:00
currently_lexing_into = C;
general_pointer ref = STORE_POINTER_inbuild_copy(NULL);
FILE *handle = Filenames::fopen(F, "r");
2020-02-18 01:50:21 +02:00
source_file *sf = NULL;
if (handle) {
text_stream *leaf = Filenames::get_leafname(F);
if (primary) leaf = I"main source text";
sf = TextFromFiles::feed_open_file_into_lexer(F, handle,
leaf, documentation_only, ref);
if (sf == NULL) {
Copies::attach(C, Copies::new_error_on_file(OPEN_FAILED_CE, F));
} else {
fclose(handle);
if (documentation_only == FALSE) @<Tell console output about the file@>;
}
}
2020-02-18 01:50:21 +02:00
currently_lexing_into = NULL;
return sf;
}
@ This is where messages like
|I've also read Standard Rules by Graham Nelson, which is 27204 words long.|
are printed to |stdout| (not |stderr|), in something of an affectionate nod
2020-02-17 11:43:20 +02:00
to TeX's traditional console output, though occasionally I think silence is
golden and that these messages could go. It's a moot point for almost all users,
though, because the console output is concealed from them by the Inform
application.
@<Tell console output about the file@> =
int wc;
char *message;
if (primary) message = "I've now read %S, which is %d words long.\n";
else message = "I've also read %S, which is %d words long.\n";
wc = TextFromFiles::total_word_count(sf);
WRITE_TO(STDOUT, message, synopsis, wc);
STREAM_FLUSH(STDOUT);
LOG(message, synopsis, wc);
2020-02-18 01:50:21 +02:00
@
2020-03-03 13:02:46 +02:00
@d SENTENCE_COUNT_MONITOR SourceText::increase_sentence_count
=
wording options_file_wording = EMPTY_WORDING_INIT;
int SourceText::increase_sentence_count(wording W) {
if (Wordings::within(W, options_file_wording) == FALSE) return TRUE;
return FALSE;
}
@
2020-02-18 01:50:21 +02:00
@d LEXER_PROBLEM_HANDLER SourceText::lexer_problem_handler
=
void SourceText::lexer_problem_handler(int err, text_stream *desc, wchar_t *word) {
if (err == MEMORY_OUT_LEXERERROR)
Errors::fatal("Out of memory: unable to create lexer workspace");
TEMPORARY_TEXT(erm);
switch (err) {
case STRING_TOO_LONG_LEXERERROR:
WRITE_TO(erm, "Too much text in quotation marks: %w", word);
break;
case WORD_TOO_LONG_LEXERERROR:
WRITE_TO(erm, "Word too long: %w", word);
break;
case I6_TOO_LONG_LEXERERROR:
WRITE_TO(erm, "I6 inclusion too long: %w", word);
break;
case STRING_NEVER_ENDS_LEXERERROR:
WRITE_TO(erm, "Quoted text never ends: %S", desc);
break;
case COMMENT_NEVER_ENDS_LEXERERROR:
WRITE_TO(erm, "Square-bracketed text never ends: %S", desc);
break;
case I6_NEVER_ENDS_LEXERERROR:
WRITE_TO(erm, "I6 inclusion text never ends: %S", desc);
break;
default:
internal_error("unknown lexer error");
}
if (currently_lexing_into) {
copy_error *CE = Copies::new_error(LEXER_CE, erm);
CE->error_subcategory = err;
CE->details = Str::duplicate(desc);
CE->word = word;
Copies::attach(currently_lexing_into, CE);
}
DISCARD_TEXT(erm);
}
2020-03-03 02:59:42 +02:00
2020-03-03 13:02:46 +02:00
@
@d EXTENSION_FILE_TYPE inbuild_copy
@
@d SYNTAX_PROBLEM_HANDLER SourceText::syntax_problem_handler
=
void SourceText::syntax_problem_handler(int err_no, wording W, void *ref, int k) {
inbuild_copy *C = (inbuild_copy *) ref;
copy_error *CE = Copies::new_error(SYNTAX_CE, NULL);
CE->error_subcategory = err_no;
CE->details_W = W;
CE->details_N = k;
Copies::attach(C, CE);
}
2020-03-03 02:59:42 +02:00
@ Sentences in the source text are of five categories: dividing sentences,
which divide up the source into segments; structural sentences, which split
the source into different forms (standard text, tables, equations, I6 matter,
and so on); nonstructural sentences, which make grammatical definitions and
give Inform other more or less direct instructions; rule declarations; and
regular sentences, those which use the standard verbs. Examples:
>> Volume II [dividing]
>> Include Locksmith by Emily Short [structural]
>> Release along with a website [nonstructural]
>> Instead of looking [rule]
>> The cushion is on the wooden chair [regular]
Dividing sentences are always read, whereas the others may be skipped in
sections of source not being included for one reason or another. Dividing
sentences must match the following. Note that the extension end markers are
only read in extensions, so they can never accidentally match in the main
source text.
@e ExtMultipleBeginsHere_SYNERROR
@e ExtBeginsAfterEndsHere_SYNERROR
@e ExtEndsWithoutBegins_SYNERROR
@e ExtMultipleEndsHere_SYNERROR
=
<dividing-sentence> ::=
<if-start-of-paragraph> <heading> | ==> R[2]
<extension-end-marker-sentence> ==> R[1]
<heading> ::=
volume ... | ==> 1
book ... | ==> 2
part ... | ==> 3
chapter ... | ==> 4
section ... ==> 5
<extension-end-marker-sentence> ::=
... begin/begins here | ==> -1; @<Check we can begin an extension here@>;
... end/ends here ==> -2; @<Check we can end an extension here@>;
@<Check we can begin an extension here@> =
switch (sfsm_extension_position) {
case 1: sfsm_extension_position++; break;
case 2: SYNTAX_PROBLEM_HANDLER(ExtMultipleBeginsHere_SYNERROR, W, sfsm_extension, 0); break;
case 3: SYNTAX_PROBLEM_HANDLER(ExtBeginsAfterEndsHere_SYNERROR, W, sfsm_extension, 0); break;
}
@<Check we can end an extension here@> =
switch (sfsm_extension_position) {
case 1: SYNTAX_PROBLEM_HANDLER(ExtEndsWithoutBegins_SYNERROR, W, sfsm_extension, 0); break;
case 2: sfsm_extension_position++; break;
case 3: SYNTAX_PROBLEM_HANDLER(ExtMultipleEndsHere_SYNERROR, W, sfsm_extension, 0); break;
}
@<Detect a dividing sentence@> =
if (<dividing-sentence>(W)) {
switch (<<r>>) {
case -1: if (sfsm_extension_position > 0) begins_or_ends = 1;
break;
case -2:
if (sfsm_extension_position > 0) begins_or_ends = -1;
break;
default:
heading_level = <<r>>;
break;
}
}
@ Structural sentences are defined as follows. (The asterisk notation isn't
known to most Inform users: it increases output to the debugging log.)
@e BIBLIOGRAPHIC_NT /* For the initial title sentence */
@e ROUTINE_NT /* "Instead of taking something, ..." */
@e INFORM6CODE_NT /* "Include (- ... -) */
@e TABLE_NT /* "Table 1 - Counties of England" */
@e EQUATION_NT /* "Equation 2 - Newton's Second Law" */
@e TRACE_NT /* A sentence consisting of an asterisk and optional quoted text */
=
<structural-sentence> ::=
<if-start-of-source-text> <quoted-text> | ==> 0; ssnt = BIBLIOGRAPHIC_NT;
<if-start-of-source-text> <quoted-text> ... | ==> 0; ssnt = BIBLIOGRAPHIC_NT;
<language-modifying-sentence> | ==> R[1]
* | ==> 0; ssnt = TRACE_NT;
* <quoted-text-without-subs> | ==> 0; ssnt = TRACE_NT;
<if-start-of-paragraph> table ... | ==> 0; ssnt = TABLE_NT;
<if-start-of-paragraph> equation ... | ==> 0; ssnt = EQUATION_NT;
include <nounphrase-articled> by <nounphrase> | ==> 0; ssnt = INCLUDE_NT; *XP = RP[1]; ((parse_node *) RP[1])->next = RP[2];
include (- ... ==> 0; ssnt = INFORM6CODE_NT;
@ Properly speaking, despite the definition above, language modifying sentences
are nonstructural. So what are they doing here? The answer is that we need to
read them early on, because they affect the way that they parse all other
sentences. Whereas other nonstructural sentences can wait, these can't.
=
<language-modifying-sentence> ::=
include (- ### in the preform grammar | ==> -2; ssnt = INFORM6CODE_NT;
use ... language element/elements ==> -1