[IndexUtilities::] Indexing Utilities. Some conveniences shared by our different forms of index. @ A temporary measure. @d LETTER_ALPHABETIZATION 1 @d WORD_ALPHABETIZATION 2 @d WRAPPER_none 1 @d WRAPPER_epub 2 @d WRAPPER_zip 3 = (early code) int indoc_settings_test_index_mode = FALSE; int indoc_settings_index_alphabetisation_algorithm = LETTER_ALPHABETIZATION; int indoc_settings_navigation_simplified_letter_rows = FALSE; int indoc_settings_wrapper = WRAPPER_none; @ = void IndexUtilities::general_link(OUTPUT_STREAM, text_stream *cl, text_stream *to, text_stream *text) { HTML::begin_link_with_class(OUT, cl, to); WRITE("%S", text); HTML::end_link(OUT); } @ = void IndexUtilities::escape_HTML_characters_in(text_stream *text) { TEMPORARY_TEXT(modified) for (int i=0, L=Str::len(text); i': WRITE_TO(modified, ">"); break; case '&': if (Str::get_at(text, i+1) == '#') { PUT_TO(modified, c); break; } int j = i+1; while (Characters::isalnum(Str::get_at(text, j))) j++; if ((j > i+1) && (Str::get_at(text, j) == ';')) { PUT_TO(modified, c); break; } WRITE_TO(modified, "&"); break; default: PUT_TO(modified, c); break; } } Str::copy(text, modified); DISCARD_TEXT(modified) } @ Span notations allow markup such as |this is *dreadful*| to represent emphasis; and are also used to mark headwords for indexing, as in |this is ^{nifty}|. @d MAX_PATTERN_LENGTH 1024 @d MARKUP_SPP 1 @d INDEX_TEXT_SPP 2 @d INDEX_SYMBOLS_SPP 3 @d WRAPPER_none 1 @d WRAPPER_epub 2 @d WRAPPER_zip 3 = typedef struct span_notation { int sp_purpose; /* one of the |*_SPP| constants */ wchar_t sp_left[MAX_PATTERN_LENGTH]; /* wide C string: the start pattern */ int sp_left_len; wchar_t sp_right[MAX_PATTERN_LENGTH]; /* wide C string: and end pattern */ int sp_right_len; struct text_stream *sp_style; CLASS_DEFINITION } span_notation; void IndexUtilities::add_span_notation(compiled_documentation *cd, text_stream *L, text_stream *R, text_stream *style, int purpose) { span_notation *SN = CREATE(span_notation); SN->sp_style = Str::duplicate(style); Str::copy_to_wide_string(SN->sp_left, L, MAX_PATTERN_LENGTH); Str::copy_to_wide_string(SN->sp_right, R, MAX_PATTERN_LENGTH); SN->sp_left_len = Str::len(L); SN->sp_right_len = Str::len(R); SN->sp_purpose = purpose; ADD_TO_LINKED_LIST(SN, span_notation, cd->id.notations); } @h Alphabetisation. We flatten the casing and remove the singular articles; we count small numbers as words, so that "3 Wise Monkeys" is filed as if it were "Three Wise Monkeys"; with parts of multipart examples, such as "Disappointment Bay 3", we insert a 0 before the 3 so that up to 99 parts can appear and alphabetical sorting will agree with numerical. = dictionary *alphabetisation_exceptions = NULL; /* hash of lemmas with unusual alphabetisations */ void IndexUtilities::alphabetisation_exception(text_stream *term, text_stream *alphabetise_as) { if (alphabetisation_exceptions == NULL) alphabetisation_exceptions = Dictionaries::new(100, TRUE); text_stream *val = Dictionaries::create_text(alphabetisation_exceptions, term); Str::copy(val, alphabetise_as); } void IndexUtilities::improve_alphabetisation(text_stream *sort_key) { text_stream *alph = Dictionaries::get_text(alphabetisation_exceptions, sort_key); if (Str::len(alph) > 0) { Str::copy(sort_key, alph); LOOP_THROUGH_TEXT(pos, sort_key) Str::put(pos, Characters::tolower(Str::get(pos))); } else { LOOP_THROUGH_TEXT(pos, sort_key) Str::put(pos, Characters::tolower(Str::get(pos))); Regexp::replace(sort_key, L"a ", NULL, REP_ATSTART); Regexp::replace(sort_key, L"an ", NULL, REP_ATSTART); Regexp::replace(sort_key, L"the ", NULL, REP_ATSTART); LOOP_THROUGH_TEXT(pos, sort_key) Str::put(pos, Characters::tolower(Characters::remove_wchar_t_accent(Str::get(pos)))); Regexp::replace(sort_key, L"%[ *%]", L"____SQUARES____", REP_REPEATING); Regexp::replace(sort_key, L"%[", NULL, REP_REPEATING); Regexp::replace(sort_key, L"%]", NULL, REP_REPEATING); Regexp::replace(sort_key, L"____SQUARES____", L"[]", REP_REPEATING); Regexp::replace(sort_key, L"%(", NULL, REP_REPEATING); Regexp::replace(sort_key, L"%)", NULL, REP_REPEATING); Regexp::replace(sort_key, L"1 ", L"one ", REP_ATSTART); Regexp::replace(sort_key, L"2 ", L"two ", REP_ATSTART); Regexp::replace(sort_key, L"3 ", L"three ", REP_ATSTART); Regexp::replace(sort_key, L"4 ", L"four ", REP_ATSTART); Regexp::replace(sort_key, L"5 ", L"five ", REP_ATSTART); Regexp::replace(sort_key, L"6 ", L"six ", REP_ATSTART); Regexp::replace(sort_key, L"7 ", L"seven ", REP_ATSTART); Regexp::replace(sort_key, L"8 ", L"eight ", REP_ATSTART); Regexp::replace(sort_key, L"9 ", L"nine ", REP_ATSTART); Regexp::replace(sort_key, L"10 ", L"ten ", REP_ATSTART); Regexp::replace(sort_key, L"11 ", L"eleven ", REP_ATSTART); Regexp::replace(sort_key, L"12 ", L"twelve ", REP_ATSTART); TEMPORARY_TEXT(x) Str::copy(x, sort_key); Str::clear(sort_key); match_results mr = Regexp::create_mr(); while (Regexp::match(&mr, x, L"(%c*?)(%d+)(%c*)")) { WRITE_TO(sort_key, "%S", mr.exp[0]); Str::copy(x, mr.exp[2]); WRITE_TO(sort_key, "%08d", Str::atoi(mr.exp[1], 0)); } WRITE_TO(sort_key, "%S", x); DISCARD_TEXT(x) } } @ = int letters_taken[26]; void IndexUtilities::note_letter(wchar_t c) { int i = c - (wchar_t) 'A'; if ((i>=0) && (i<26)) letters_taken[i] = TRUE; } void IndexUtilities::alphabet_row(OUTPUT_STREAM, int sequence) { switch (sequence) { case 1: for (int i=0; i<26; i++) letters_taken[i] = FALSE; break; case 2: { int faked = FALSE; for (int i=0; i<26; i++) if (letters_taken[i] == FALSE) { if (faked == FALSE) { faked = TRUE; HTML_OPEN("p"); } TEMPORARY_TEXT(singleton) PUT_TO(singleton, 'A'+i); HTML::anchor(OUT, singleton); DISCARD_TEXT(singleton) } if (faked) { HTML_CLOSE("p"); } break; } } if (indoc_settings_navigation_simplified_letter_rows) { HTML_OPEN("p"); } else { HTML_OPEN_WITH("table", "class=\"fullwidth\""); HTML_OPEN("tr"); HTML_OPEN_WITH("td", "class=\"letterinrow\""); } IndexUtilities::general_link(OUT, I"letterlink", I"#A", I"A"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#B", I"B"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#C", I"C"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#D", I"D"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#E", I"E"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#F", I"F"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#G", I"G"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#H", I"H"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#I", I"I"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#J", I"J"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#K", I"K"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#L", I"L"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#M", I"M"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#N", I"N"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#O", I"O"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#P", I"P"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#Q", I"Q"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#R", I"R"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#S", I"S"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#T", I"T"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#U", I"U"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#V", I"V"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#W", I"W"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#X", I"X"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#Y", I"Y"); @; IndexUtilities::general_link(OUT, I"letterlink", I"#Z", I"Z"); if (indoc_settings_navigation_simplified_letter_rows) { HTML_CLOSE("p"); } else { HTML_CLOSE("td"); HTML_CLOSE("tr"); HTML_CLOSE("table"); } } @ = if (indoc_settings_navigation_simplified_letter_rows) WRITE(" / "); else { HTML_CLOSE("td"); HTML_OPEN_WITH("td", "class=\"letterinrow\""); } @ This is mainly used for the typographically dramatic link letters A, B, C, ... but can also make fatter typographically dramatic headings, if it's stretched in width and a longer text is supplied. = void IndexUtilities::majuscule_heading(OUTPUT_STREAM, text_stream *display_text, int single_letter) { if (indoc_settings_navigation_simplified_letter_rows) { if (single_letter == 1) { HTML::begin_div_with_class_S(OUT, I"majuscule", __FILE__, __LINE__); } else { HTML::begin_div_with_class_S(OUT, I"stretchymajuscule", __FILE__, __LINE__); } HTML_OPEN_WITH("span", "class=\"majusculelettering\""); WRITE("%S", display_text); HTML_CLOSE("span"); HTML::end_div(OUT); } else { WRITE("%S", display_text); } }