1
0
Fork 0
mirror of https://github.com/ganelson/inform.git synced 2024-06-17 07:40:47 +03:00

Experimental Unicode-handling change

This commit is contained in:
Graham Nelson 2023-05-16 08:11:55 +01:00
parent 33c10204bc
commit da9fb27be5
21 changed files with 31 additions and 21 deletions

View file

@ -1,6 +1,6 @@
# Inform 7
[Version](notes/versioning.md): 10.2.0-beta+6W48 'Krypton' (15 May 2023)
[Version](notes/versioning.md): 10.2.0-beta+6W49 'Krypton' (16 May 2023)
## About Inform

View file

@ -1,3 +1,3 @@
Prerelease: beta
Build Date: 15 May 2023
Build Number: 6W48
Build Date: 16 May 2023
Build Number: 6W49

View file

@ -171,7 +171,7 @@ by the local |\n| for good measure.
@<Read the titling line of the extension and normalise its casing@> =
int c;
while ((c = TextFiles::utf8_fgetc(EXTF, NULL, FALSE, NULL)) != EOF) {
while ((c = TextFiles::utf8_fgetc(EXTF, NULL, NULL)) != EOF) {
if (c == 0xFEFF) continue; /* skip the optional Unicode BOM pseudo-character */
if ((c == '\x0a') || (c == '\x0d') || (c == '\n')) break;
PUT_TO(titling_line, c);
@ -188,7 +188,7 @@ thing we read here is a meaningless |0D|.
@<Read the rubric text, if any is present@> =
int c, found_start = FALSE;
while ((c = TextFiles::utf8_fgetc(EXTF, NULL, FALSE, NULL)) != EOF) {
while ((c = TextFiles::utf8_fgetc(EXTF, NULL, NULL)) != EOF) {
if ((c == '\x0a') || (c == '\x0d') || (c == '\n') || (c == '\t')) c = ' ';
if ((c != ' ') && (found_start == FALSE)) {
if (c == '"') found_start = TRUE;

View file

@ -1187,7 +1187,7 @@ the whole thing goes into |bibliographic_sentence| and |bracketed| is empty.
@<Capture the opening sentence and its bracketed part@> =
int c, commented = FALSE, quoted = FALSE, rounded = FALSE, content_found = FALSE;
while ((c = TextFiles::utf8_fgetc(SF, NULL, FALSE, NULL)) != EOF) {
while ((c = TextFiles::utf8_fgetc(SF, NULL, NULL)) != EOF) {
if (c == 0xFEFF) continue; /* skip the optional Unicode BOM pseudo-character */
if (commented) {
if (c == ']') commented = FALSE;

View file

@ -43,8 +43,11 @@ source_file *SourceText::read_file(inbuild_copy *C, filename *F, text_stream *sy
if (handle) {
text_stream *leaf = Filenames::get_leafname(F);
if (primary) leaf = I"main source text";
int mode = UNICODE_UFBHM;
target_vm *vm = Supervisor::current_vm();
if (TargetVMs::is_16_bit(vm)) mode = ZSCII_UFBHM;
sf = TextFromFiles::feed_open_file_into_lexer(F, handle,
leaf, documentation_only, ref);
leaf, documentation_only, ref, mode);
if (sf == NULL) {
Copies::attach_error(C, CopyErrors::new_F(OPEN_FAILED_CE, -1, F));
} else {

View file

@ -2,7 +2,7 @@
"is": {
"type": "kit",
"title": "BasicInformExtrasKit",
"version": "10.2.0-beta+6W48"
"version": "10.2.0-beta+6W49"
},
"kit-details": {
"has-priority": 1

View file

@ -2,7 +2,7 @@
"is": {
"type": "kit",
"title": "BasicInformKit",
"version": "10.2.0-beta+6W48"
"version": "10.2.0-beta+6W49"
},
"needs": [ {
"unless": {

View file

@ -2,7 +2,7 @@
"is": {
"type": "kit",
"title": "CommandParserKit",
"version": "10.2.0-beta+6W48"
"version": "10.2.0-beta+6W49"
},
"needs": [ {
"need": {

View file

@ -2,7 +2,7 @@
"is": {
"type": "kit",
"title": "EnglishLanguageKit",
"version": "10.2.0-beta+6W48"
"version": "10.2.0-beta+6W49"
},
"needs": [ {
"need": {

View file

@ -2,7 +2,7 @@
"is": {
"type": "kit",
"title": "WorldModelKit",
"version": "10.2.0-beta+6W48"
"version": "10.2.0-beta+6W49"
},
"needs": [ {
"need": {

View file

@ -1,3 +1,4 @@
!% -Cu
!% $ZCODE_LESS_DICT_DATA=1;
!% $OMIT_UNUSED_ROUTINES=1;
Constant Grammar__Version 2;

View file

@ -1,3 +1,4 @@
!% -Cu
!% $ZCODE_LESS_DICT_DATA=1;
!% $OMIT_UNUSED_ROUTINES=1;
Constant Grammar__Version 2;

View file

@ -1,3 +1,4 @@
!% -Cu
!% $ZCODE_LESS_DICT_DATA=1;
!% $OMIT_UNUSED_ROUTINES=1;
Constant Grammar__Version 2;

View file

@ -1,3 +1,4 @@
!% -Cu
!% $ZCODE_LESS_DICT_DATA=1;
!% $OMIT_UNUSED_ROUTINES=1;
Constant Grammar__Version 2;

View file

@ -1,3 +1,4 @@
!% -Cu
!% $ZCODE_LESS_DICT_DATA=1;
!% $OMIT_UNUSED_ROUTINES=1;
Constant Grammar__Version 2;

View file

@ -1,3 +1,4 @@
!% -Cu
!% $ZCODE_LESS_DICT_DATA=1;
!% $OMIT_UNUSED_ROUTINES=1;
Constant Grammar__Version 2;

View file

@ -132,6 +132,7 @@ See the Inform 6 Technical Manual for more on these oddities.
CodeGen::deselect(gen, saved);
saved = CodeGen::select(gen, ICL_directives_I7CGS);
OUT = CodeGen::current(gen);
WRITE("!%% -Cu\n");
WRITE("!%% $ZCODE_LESS_DICT_DATA=1;\n");
if (omit_ur) WRITE("!%% $OMIT_UNUSED_ROUTINES=1;\n");
CodeGen::deselect(gen, saved);

View file

@ -155,7 +155,7 @@ void RunningPipelines::run(pathname *P, inter_pipeline *S, inter_tree *I,
@<Work out the filename@>;
text_stream text_output_struct;
text_stream *T = &text_output_struct;
if (STREAM_OPEN_TO_FILE(T, step->ephemera.parsed_filename, ISO_ENC) == FALSE) {
if (STREAM_OPEN_TO_FILE(T, step->ephemera.parsed_filename, UTF8_ENC) == FALSE) {
PipelineErrors::error(step, "unable to open file named in pipeline step");
active = FALSE;
} else {

View file

@ -296,7 +296,7 @@ void DocReferences::doc_fragment_to(OUTPUT_STREAM, text_stream *fn) {
int i = 0;
p[0] = 0;
while (TRUE) {
int c = TextFiles::utf8_fgetc(FRAGMENTS, NULL, FALSE, NULL);
int c = TextFiles::utf8_fgetc(FRAGMENTS, NULL, NULL);
if (c == EOF) break;
if (c == 0xFEFF) continue; /* the Unicode BOM non-character */
if (i == MAX_EXTENT_OF_FRAGMENTS) break;

View file

@ -115,7 +115,7 @@ int Localisation::stock_from_file(filename *localisation_file, localisation_dict
}
@<Read next character@> =
cr = TextFiles::utf8_fgetc(Input_File, NULL, FALSE, &ufb);
cr = TextFiles::utf8_fgetc(Input_File, NULL, &ufb);
col++;
if ((cr == 10) || (cr == 13)) { col = 0; nwsol = FALSE; line++; }

View file

@ -31,7 +31,7 @@ instance, so they are not similarly converted.
=
source_file *TextFromFiles::feed_open_file_into_lexer(filename *F, FILE *handle,
text_stream *leaf, int documentation_only, general_pointer ref) {
text_stream *leaf, int documentation_only, general_pointer ref, int mode) {
source_file *sf = CREATE(source_file);
sf->words_of_source = 0;
sf->words_of_quoted_text = 0;
@ -41,7 +41,7 @@ source_file *TextFromFiles::feed_open_file_into_lexer(filename *F, FILE *handle,
source_location top_of_file;
int cr, last_cr, next_cr, read_cr, newline_char = 0;
unicode_file_buffer ufb = TextFiles::create_ufb();
unicode_file_buffer ufb = TextFiles::create_filtered_ufb(mode);
top_of_file.file_of_origin = sf;
top_of_file.line_number = 1;
@ -49,10 +49,10 @@ source_file *TextFromFiles::feed_open_file_into_lexer(filename *F, FILE *handle,
Lexer::feed_begins(top_of_file);
if (documentation_only) lexer_wait_for_dashes = TRUE;
last_cr = ' '; cr = ' '; next_cr = TextFiles::utf8_fgetc(sf->handle, NULL, TRUE, &ufb);
if (next_cr == 0xFEFF) next_cr = TextFiles::utf8_fgetc(sf->handle, NULL, TRUE, &ufb); /* Unicode BOM code */
last_cr = ' '; cr = ' '; next_cr = TextFiles::utf8_fgetc(sf->handle, NULL, &ufb);
if (next_cr == 0xFEFF) next_cr = TextFiles::utf8_fgetc(sf->handle, NULL, &ufb); /* Unicode BOM code */
if (next_cr != EOF)
while (((read_cr = TextFiles::utf8_fgetc(sf->handle, NULL, TRUE, &ufb)), next_cr) != EOF) {
while (((read_cr = TextFiles::utf8_fgetc(sf->handle, NULL, &ufb)), next_cr) != EOF) {
last_cr = cr; cr = next_cr; next_cr = read_cr;
switch(cr) {
case '\x0a':
@ -94,7 +94,7 @@ source_file *TextFromFiles::feed_into_lexer(filename *F, general_pointer ref) {
FILE *handle = Filenames::fopen(F, "r");
if (handle == NULL) return NULL;
source_file *sf = TextFromFiles::feed_open_file_into_lexer(F, handle,
Filenames::get_leafname(F), FALSE, ref);
Filenames::get_leafname(F), FALSE, ref, UNICODE_UFBHM);
fclose(handle);
return sf;
}