Experimental Unicode-handling change

2024-06-26 04:00:43 +03:00 · 2023-05-16 08:11:55 +01:00 · 2023-05-16 08:11:55 +01:00 · da9fb27be5
parent 33c10204bc
commit da9fb27be5
21 changed files with 31 additions and 21 deletions
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # Inform 7

-[Version](notes/versioning.md): 10.2.0-beta+6W48 'Krypton' (15 May 2023)
+[Version](notes/versioning.md): 10.2.0-beta+6W49 'Krypton' (16 May 2023)

 ## About Inform

--- a/build.txt
+++ b/build.txt
@ -1,3 +1,3 @@
 Prerelease: beta
-Build Date: 15 May 2023
-Build Number: 6W48
+Build Date: 16 May 2023
+Build Number: 6W49
--- a/inbuild/supervisor-module/Chapter
+++ b/inbuild/supervisor-module/Chapter
@ -171,7 +171,7 @@ by the local |\n| for good measure.

@<Read the titling line of the extension and normalise its casing@> =
 	int c;
-	while ((c = TextFiles::utf8_fgetc(EXTF, NULL, FALSE, NULL)) != EOF) {
+	while ((c = TextFiles::utf8_fgetc(EXTF, NULL, NULL)) != EOF) {
 		if (c == 0xFEFF) continue; /* skip the optional Unicode BOM pseudo-character */
 		if ((c == '\x0a') || (c == '\x0d') || (c == '\n')) break;
 		PUT_TO(titling_line, c);
@ -188,7 +188,7 @@ thing we read here is a meaningless |0D|.

@<Read the rubric text, if any is present@> =
 	int c, found_start = FALSE;
-	while ((c = TextFiles::utf8_fgetc(EXTF, NULL, FALSE, NULL)) != EOF) {
+	while ((c = TextFiles::utf8_fgetc(EXTF, NULL, NULL)) != EOF) {
 		if ((c == '\x0a') || (c == '\x0d') || (c == '\n') || (c == '\t')) c = ' ';
 		if ((c != ' ') && (found_start == FALSE)) {
 			if (c == '"') found_start = TRUE;
--- a/inbuild/supervisor-module/Chapter
+++ b/inbuild/supervisor-module/Chapter
@ -1187,7 +1187,7 @@ the whole thing goes into |bibliographic_sentence| and |bracketed| is empty.

@<Capture the opening sentence and its bracketed part@> =
 	int c, commented = FALSE, quoted = FALSE, rounded = FALSE, content_found = FALSE;
-	while ((c = TextFiles::utf8_fgetc(SF, NULL, FALSE, NULL)) != EOF) {
+	while ((c = TextFiles::utf8_fgetc(SF, NULL, NULL)) != EOF) {
 		if (c == 0xFEFF) continue; /* skip the optional Unicode BOM pseudo-character */
 		if (commented) {
 			if (c == ']') commented = FALSE;
--- a/inbuild/supervisor-module/Chapter
+++ b/inbuild/supervisor-module/Chapter
@ -43,8 +43,11 @@ source_file *SourceText::read_file(inbuild_copy *C, filename *F, text_stream *sy
 	if (handle) {
 		text_stream *leaf = Filenames::get_leafname(F);
 		if (primary) leaf = I"main source text";
+		int mode = UNICODE_UFBHM;
+		target_vm *vm = Supervisor::current_vm();
+		if (TargetVMs::is_16_bit(vm)) mode = ZSCII_UFBHM;
 		sf = TextFromFiles::feed_open_file_into_lexer(F, handle,
-			leaf, documentation_only, ref);
+			leaf, documentation_only, ref, mode);
 		if (sf == NULL) {
 			Copies::attach_error(C, CopyErrors::new_F(OPEN_FAILED_CE, -1, F));
 		} else {
--- a/inform7/Internal/Inter/BasicInformExtrasKit/kit_metadata.json
+++ b/inform7/Internal/Inter/BasicInformExtrasKit/kit_metadata.json
@ -2,7 +2,7 @@
    "is": {
        "type": "kit",
        "title": "BasicInformExtrasKit",
-        "version": "10.2.0-beta+6W48"
+        "version": "10.2.0-beta+6W49"
    },
    "kit-details": {
        "has-priority": 1
--- a/inform7/Internal/Inter/BasicInformKit/kit_metadata.json
+++ b/inform7/Internal/Inter/BasicInformKit/kit_metadata.json
@ -2,7 +2,7 @@
    "is": {
        "type": "kit",
        "title": "BasicInformKit",
-        "version": "10.2.0-beta+6W48"
+        "version": "10.2.0-beta+6W49"
    },
    "needs": [ {
        "unless": {
--- a/inform7/Internal/Inter/CommandParserKit/kit_metadata.json
+++ b/inform7/Internal/Inter/CommandParserKit/kit_metadata.json
@ -2,7 +2,7 @@
    "is": {
        "type": "kit",
        "title": "CommandParserKit",
-        "version": "10.2.0-beta+6W48"
+        "version": "10.2.0-beta+6W49"
    },
    "needs": [ {
        "need": {
--- a/inform7/Internal/Inter/EnglishLanguageKit/kit_metadata.json
+++ b/inform7/Internal/Inter/EnglishLanguageKit/kit_metadata.json
@ -2,7 +2,7 @@
    "is": {
        "type": "kit",
        "title": "EnglishLanguageKit",
-        "version": "10.2.0-beta+6W48"
+        "version": "10.2.0-beta+6W49"
    },
    "needs": [ {
        "need": {
--- a/inform7/Internal/Inter/WorldModelKit/kit_metadata.json
+++ b/inform7/Internal/Inter/WorldModelKit/kit_metadata.json
@ -2,7 +2,7 @@
    "is": {
        "type": "kit",
        "title": "WorldModelKit",
-        "version": "10.2.0-beta+6W48"
+        "version": "10.2.0-beta+6W49"
    },
    "needs": [ {
        "need": {
--- a/inter/Tests/General/_Results_Ideal/Cons.txt
+++ b/inter/Tests/General/_Results_Ideal/Cons.txt
@ -1,3 +1,4 @@
+!% -Cu
 !% $ZCODE_LESS_DICT_DATA=1;
 !% $OMIT_UNUSED_ROUTINES=1;
 Constant Grammar__Version 2;
--- a/inter/Tests/General/_Results_Ideal/ObjKind.txt
+++ b/inter/Tests/General/_Results_Ideal/ObjKind.txt
@ -1,3 +1,4 @@
+!% -Cu
 !% $ZCODE_LESS_DICT_DATA=1;
 !% $OMIT_UNUSED_ROUTINES=1;
 Constant Grammar__Version 2;
--- a/inter/Tests/General/_Results_Ideal/Predeclared.txt
+++ b/inter/Tests/General/_Results_Ideal/Predeclared.txt
@ -1,3 +1,4 @@
+!% -Cu
 !% $ZCODE_LESS_DICT_DATA=1;
 !% $OMIT_UNUSED_ROUTINES=1;
 Constant Grammar__Version 2;
--- a/inter/Tests/General/_Results_Ideal/Primitives.txt
+++ b/inter/Tests/General/_Results_Ideal/Primitives.txt
@ -1,3 +1,4 @@
+!% -Cu
 !% $ZCODE_LESS_DICT_DATA=1;
 !% $OMIT_UNUSED_ROUTINES=1;
 Constant Grammar__Version 2;
--- a/inter/Tests/General/_Results_Ideal/SimpleKind.txt
+++ b/inter/Tests/General/_Results_Ideal/SimpleKind.txt
@ -1,3 +1,4 @@
+!% -Cu
 !% $ZCODE_LESS_DICT_DATA=1;
 !% $OMIT_UNUSED_ROUTINES=1;
 Constant Grammar__Version 2;
--- a/inter/Tests/General/_Results_Ideal/SimpleRoutines.txt
+++ b/inter/Tests/General/_Results_Ideal/SimpleRoutines.txt
@ -1,3 +1,4 @@
+!% -Cu
 !% $ZCODE_LESS_DICT_DATA=1;
 !% $OMIT_UNUSED_ROUTINES=1;
 Constant Grammar__Version 2;
--- a/inter/final-module/Chapter
+++ b/inter/final-module/Chapter
@ -132,6 +132,7 @@ See the Inform 6 Technical Manual for more on these oddities.
 	CodeGen::deselect(gen, saved);
 	saved = CodeGen::select(gen, ICL_directives_I7CGS);
 	OUT = CodeGen::current(gen);
+	WRITE("!%% -Cu\n");
 	WRITE("!%% $ZCODE_LESS_DICT_DATA=1;\n");
 	if (omit_ur) WRITE("!%% $OMIT_UNUSED_ROUTINES=1;\n");
 	CodeGen::deselect(gen, saved);
--- a/inter/pipeline-module/Chapter
+++ b/inter/pipeline-module/Chapter
@ -155,7 +155,7 @@ void RunningPipelines::run(pathname *P, inter_pipeline *S, inter_tree *I,
 				@<Work out the filename@>;
 				text_stream text_output_struct;
 				text_stream *T = &text_output_struct;
-				if (STREAM_OPEN_TO_FILE(T, step->ephemera.parsed_filename, ISO_ENC) == FALSE) {
+				if (STREAM_OPEN_TO_FILE(T, step->ephemera.parsed_filename, UTF8_ENC) == FALSE) {
 					PipelineErrors::error(step, "unable to open file named in pipeline step");
 					active = FALSE;
 				} else {
--- a/services/html-module/Chapter
+++ b/services/html-module/Chapter
@ -296,7 +296,7 @@ void DocReferences::doc_fragment_to(OUTPUT_STREAM, text_stream *fn) {
 	int i = 0;
 	p[0] = 0;
 	while (TRUE) {
-		int c = TextFiles::utf8_fgetc(FRAGMENTS, NULL, FALSE, NULL);
+		int c = TextFiles::utf8_fgetc(FRAGMENTS, NULL, NULL);
 		if (c == EOF) break;
 		if (c == 0xFEFF) continue; /* the Unicode BOM non-character */
 		if (i == MAX_EXTENT_OF_FRAGMENTS) break;
--- a/services/html-module/Chapter
+++ b/services/html-module/Chapter
@ -115,7 +115,7 @@ int Localisation::stock_from_file(filename *localisation_file, localisation_dict
 }

@<Read next character@> =
-	cr = TextFiles::utf8_fgetc(Input_File, NULL, FALSE, &ufb);
+	cr = TextFiles::utf8_fgetc(Input_File, NULL, &ufb);
 	col++;
 	if ((cr == 10) || (cr == 13)) { col = 0; nwsol = FALSE; line++; }

--- a/services/words-module/Chapter
+++ b/services/words-module/Chapter
@ -31,7 +31,7 @@ instance, so they are not similarly converted.

 =
 source_file *TextFromFiles::feed_open_file_into_lexer(filename *F, FILE *handle,
-	text_stream *leaf, int documentation_only, general_pointer ref) {
+	text_stream *leaf, int documentation_only, general_pointer ref, int mode) {
 	source_file *sf = CREATE(source_file);
 	sf->words_of_source = 0;
 	sf->words_of_quoted_text = 0;
@ -41,7 +41,7 @@ source_file *TextFromFiles::feed_open_file_into_lexer(filename *F, FILE *handle,
 	source_location top_of_file;
 	int cr, last_cr, next_cr, read_cr, newline_char = 0;

-	unicode_file_buffer ufb = TextFiles::create_ufb();
+	unicode_file_buffer ufb = TextFiles::create_filtered_ufb(mode);

 	top_of_file.file_of_origin = sf;
 	top_of_file.line_number = 1;
@ -49,10 +49,10 @@ source_file *TextFromFiles::feed_open_file_into_lexer(filename *F, FILE *handle,
 	Lexer::feed_begins(top_of_file);
 	if (documentation_only) lexer_wait_for_dashes = TRUE;

-	last_cr = ' '; cr = ' '; next_cr = TextFiles::utf8_fgetc(sf->handle, NULL, TRUE, &ufb);
-	if (next_cr == 0xFEFF) next_cr = TextFiles::utf8_fgetc(sf->handle, NULL, TRUE, &ufb); /* Unicode BOM code */
+	last_cr = ' '; cr = ' '; next_cr = TextFiles::utf8_fgetc(sf->handle, NULL, &ufb);
+	if (next_cr == 0xFEFF) next_cr = TextFiles::utf8_fgetc(sf->handle, NULL, &ufb); /* Unicode BOM code */
 	if (next_cr != EOF)
-		while (((read_cr = TextFiles::utf8_fgetc(sf->handle, NULL, TRUE, &ufb)), next_cr) != EOF) {
+		while (((read_cr = TextFiles::utf8_fgetc(sf->handle, NULL, &ufb)), next_cr) != EOF) {
 			last_cr = cr; cr = next_cr; next_cr = read_cr;
 			switch(cr) {
 				case '\x0a':
@ -94,7 +94,7 @@ source_file *TextFromFiles::feed_into_lexer(filename *F, general_pointer ref) {
 	FILE *handle = Filenames::fopen(F, "r");
 	if (handle == NULL) return NULL;
 	source_file *sf = TextFromFiles::feed_open_file_into_lexer(F, handle,
-		Filenames::get_leafname(F), FALSE, ref);
+		Filenames::get_leafname(F), FALSE, ref, UNICODE_UFBHM);
 	fclose(handle);
 	return sf;
 }