commit 2a949fb613257f464f03d61c3a2d7ea0434c89f9 Author: Gavin Smith Date: Sun Mar 9 16:27:22 2014 +0000 * info-utils.c, variables.c (preprocess_nodes_p): New variable. * info-utils.c (scan_node_contents, init_output_stream) (write_and_advance, underlining_on, underlining_off) (parse_top_node_line): New function and helper functions, based on info-utils.c:info_references_internal. (info_parse_node): Return length of parsed node label. * nodes.h (N_WasRewritten): New preprocessor symbol. diff --git a/info-utils.c b/info-utils.c index 37659d0..d23e971 100644 --- a/info-utils.c +++ b/info-utils.c @@ -58,20 +58,22 @@ static void saven_nodename (char *nodename, int len); static REFERENCE **info_references_internal (char *label, SEARCH_BINDING *binding); -/* Parse the filename and nodename out of STRING. If STRING doesn't - contain a filename (i.e., it is NOT (FILENAME)NODENAME) then set - INFO_PARSED_FILENAME to NULL. The second argument is one of - the PARSE_NODE_* constants. It specifies how to parse the node name: +/* Parse the filename and nodename out of STRING. Return length of node + specification. If STRING doesn't contain a filename (i.e., it is NOT + (FILENAME)NODENAME) then set INFO_PARSED_FILENAME to NULL. The + second argument is one of the PARSE_NODE_* constants. It specifies + how to parse the node name: PARSE_NODE_DFLT Node name stops at LF, `,', `.', or `TAB' PARSE_NODE_SKIP_NEWLINES Node name stops at `,', `.', or `TAB' PARSE_NODE_VERBATIM Don't parse nodename */ -void +int info_parse_node (char *string, int flag) { register int i = 0; + int length = 0; /* Return value */ /* Default the answer. */ save_filename (NULL); @@ -81,7 +83,8 @@ info_parse_node (char *string, int flag) if (!string || !*string) return; - string += skip_whitespace (string); + length = skip_whitespace (string); + string += length; /* Check for (FILENAME)NODENAME. */ if (*string == '(') @@ -92,6 +95,7 @@ info_parse_node (char *string, int flag) i = 0; /* Advance past the opening paren. */ string++; + length++; /* Find the closing paren. Handle nested parens correctly. */ for (bcnt = 0, bfirst = -1; string[i]; i++) @@ -119,13 +123,19 @@ info_parse_node (char *string, int flag) /* Point directly at the nodename. */ string += i; + length += i; if (*string) - string++; + { + string++; + length++; + } } /* Parse out nodename. */ i = skip_node_characters (string, flag); + length += i; + length++; /* skip_node_characters() stops on terminating character */ saven_nodename (string, i); canonicalize_whitespace (info_parsed_nodename); if (info_parsed_nodename && !*info_parsed_nodename) @@ -164,6 +174,7 @@ info_parse_node (char *string, int flag) else info_parsed_line_number = 0; } + return length; } /* Return the node addressed by LABEL in NODE (usually one of "Prev:", @@ -601,6 +612,511 @@ printed_representation (const char *character, size_t len, size_t hpos, /* **************************************************************** */ /* */ +/* Scanning node */ +/* */ +/* **************************************************************** */ + +/* Whether to strip syntax from the text of nodes. */ +int preprocess_nodes_p; + +static size_t output_allocated; +static size_t output_length; +static char **output_start; + +static void +init_output_stream (char **o_s) +{ + output_allocated = 0; + output_length = 0; + output_start = o_s; +} + +static void +write_and_advance (char **output, char *input, size_t n) +{ + if (preprocess_nodes_p) + { + output_length += n; + while (output_allocated < output_length) + { + size_t offset; + if (output_allocated == 0) + { + output_allocated = 8; /* Initial allocation */ + offset = 0; + } + else + { + offset = *output - *output_start; + } + + *output_start = x2realloc (*output_start, &output_allocated); + *output = *output_start + offset; + } + memmove (*output, input, n); + *output += n; + } +} + +/* ANSI escape codes */ +#define ANSI_UNDERLINING_OFF "\033[24m" +#define ANSI_UNDERLINING_ON "\033[4m" + +/* Turn off underlining */ +static void +underlining_off (char **output) +{ + write_and_advance (output, ANSI_UNDERLINING_OFF, + strlen (ANSI_UNDERLINING_OFF)); +} + +static void +underlining_on (char **output) +{ + write_and_advance (output, ANSI_UNDERLINING_ON, + strlen (ANSI_UNDERLINING_ON)); +} + +/* Read first line of node and set next, prev and up. Advance INPTR past + the first line. */ +static void +parse_top_node_line (NODE *node, char **inptr) +{ + char *nodeptr = *inptr; + char **store_in; + int value_length; + + node->next = node->prev = node->up = 0; + + while (1) + { + store_in = 0; + + nodeptr += skip_whitespace (nodeptr); + + /* Check what field we are looking at */ + if (!strncmp (nodeptr, INFO_FILE_LABEL, strlen(INFO_FILE_LABEL))) + { + nodeptr += strlen(INFO_FILE_LABEL); + } + else if (!strncmp (nodeptr, INFO_NODE_LABEL, strlen(INFO_NODE_LABEL))) + { + nodeptr += strlen(INFO_NODE_LABEL); + } + else if (!strncmp (nodeptr, INFO_PREV_LABEL, strlen(INFO_PREV_LABEL))) + { + nodeptr += strlen(INFO_PREV_LABEL); + store_in = &(node->prev); + } + else if (!strncmp (nodeptr, INFO_NEXT_LABEL, strlen(INFO_NEXT_LABEL))) + { + nodeptr += strlen(INFO_NEXT_LABEL); + store_in = &(node->next); + } + else if (!strncmp (nodeptr, INFO_UP_LABEL, strlen(INFO_UP_LABEL))) + { + nodeptr += strlen(INFO_UP_LABEL); + store_in = &(node->up); + } + else + { + /* Not recognized - code below will skip to next comma */ + } + + nodeptr += skip_whitespace (nodeptr); + + /* PARSE_NODE_START separates at commas or newlines, so it + will work for filenames including full stops. */ + value_length = skip_node_characters (nodeptr, PARSE_NODE_START); + + if (store_in) + { + (*store_in) = xmalloc (value_length + 1); + memmove (*store_in, nodeptr, value_length); + (*store_in) [value_length] = '\0'; + } + + nodeptr += value_length; + if ((*nodeptr) == '\n') + { + nodeptr++; + break; + } + nodeptr++; /* Point after field terminator */ + } + *inptr = nodeptr; +} + +/* Check if there is a colon on the next line and return its offset. + Return -1 if there is no such colon. */ +static int +colon_after_newline (char *nodeptr) +{ + int nl, colon_offset; + + /* Check if a newline intervenes */ + nl = skip_line (nodeptr); + colon_offset = string_in_line (":", nodeptr + nl); + if (colon_offset != -1) + return nl + colon_offset; + else + return -1; +} + +/* Remove syntax from NODE->contents and build list of references + in node. */ +void +scan_node_contents (NODE *node) +{ + SEARCH_BINDING s; + char *search_string; + + char *nodeptr; + char *new_contents = 0, *outptr = 0; + + int found_menu_entry, in_index = 0; + + REFERENCE **refs = NULL; + size_t refs_index = 0, refs_slots = 0; + + /* Used to correct line offsets in index entries */ + int deleted_lines = 0; + + long position; + + init_output_stream (&new_contents); + + /* Initialize refs to point to array of one null pointer in case + there are no results. This way we know if refs has been initialized + even if it is empty. */ + refs = calloc (1, sizeof *refs); + + refs_slots = 1; + + nodeptr = node->contents; + + parse_top_node_line (node, &nodeptr); + deleted_lines++; + + /* Search for menu items or cross references in buffer. + This is INFO_MENU_LABEL "|" INFO_XREF_LABEL, but + with '*' characters escaped. */ + search_string = "\n\\* Menu:|\\*Note"; + + s.buffer = node->contents; + s.start = nodeptr - node->contents; + s.end = node->nodelen; +search_again: + + s.flags = S_FoldCase | S_SkipDest; + + while (regexp_search (search_string, + &s, &position, 0) == search_success) + { + /* Save offset of "*" starting link. When preprocess_nodes is Off, + we position the cursor on the * when moving to a link. */ + int start_of_reference; + + /* Cross-references can be generated by four different Texinfo + commands. @inforef and @xref output "*Note " in Info format, + and "See" in HTML and print. @ref and @pxref output "*note " + in Info format, and either nothing at all or "see" in HTML + and print. Unfortunately, there is no easy way to distinguish + between these latter two cases. We must make do with + displayed manuals occasionally containing "See see" and the + like. */ + int capital_s; + + int colon_offset; + REFERENCE *entry; + char *copy_to; + char *labelptr; + int leading_whitespace; + int newline_offset; + + /* Pointer to search result (after match) */ + copy_to = s.buffer + position; + + /* Was "* Menu:" seen? If so, search for menu entries hereafter */ + if (*(copy_to - 1) == ':') + { + /* This is INFO_MENU_ENTRY_LABEL "|" INFO_XREF_LABEL, but + with '*' characters escaped. */ + search_string = "\n\\* |\\*Note"; + + /* Write out up to Menu label, and skip it */ + copy_to -= 8; + write_and_advance (&outptr, nodeptr, copy_to - nodeptr); + + nodeptr = copy_to + 8; + deleted_lines++; + s.start = nodeptr - s.buffer; + continue; + } + + /* Check what we found based on last character of match */ + if (*(copy_to - 1) == ' ') + { + found_menu_entry = 1; + start_of_reference = copy_to - node->contents - 2; + } + else + { + found_menu_entry = 0; + + capital_s = copy_to[-4] == 'N'; + start_of_reference = copy_to - node->contents - 5; + copy_to -= 5; /* Point to before link */ + } + + /* Write out up to current reference */ + write_and_advance (&outptr, nodeptr, copy_to - nodeptr); + + /* Skip notation */ + if (found_menu_entry) + nodeptr = copy_to; + else + nodeptr = copy_to + 5; + + /* Search forward to ":" to get reference label. */ + nodeptr += skip_whitespace (nodeptr); + colon_offset = string_in_line (":", nodeptr); + + /* Cross-references may have a newline in the middle. */ + if (colon_offset == -1 + && !found_menu_entry + && (colon_offset = colon_after_newline (nodeptr)) != -1) + ; + else if (colon_offset == -1) + { + /* This is not a menu entry or reference. */ + nodeptr++; + + s.start = nodeptr - s.buffer; + continue; + } + colon_offset--; /* Offset of colon, not character after it. */ + + /* We definitely have a reference by this point. Create + REFERENCE entity. */ + entry = xmalloc (sizeof (REFERENCE)); + entry->filename = NULL; + entry->nodename = NULL; + entry->label = NULL; + entry->line_number = 0; + if (found_menu_entry) + entry->type = REFERENCE_MENU_ITEM; + else + entry->type = REFERENCE_XREF; + + /* Save label as it appears in input. */ + entry->label = xmalloc(colon_offset + 1); + strncpy (entry->label, nodeptr, colon_offset); + entry->label[colon_offset] = '\0'; + + /* Output reference label and set location of reference. */ + + if (!found_menu_entry) + { + if (capital_s) + write_and_advance (&outptr, "See ", 4); + else + write_and_advance (&outptr, "see ", 4); + } + + /* Output any whitespace or newlines before reference label */ + leading_whitespace = skip_whitespace_and_newlines (entry->label); + write_and_advance (&outptr, entry->label, leading_whitespace); + + underlining_on (&outptr); + + /* Point reference to where we will put the displayed reference, + which could be after whitespace. */ + if (preprocess_nodes_p) + { + entry->start = outptr - new_contents; + } + else + { + entry->start = start_of_reference; + } + + entry->end = entry->start + strlen (entry->label) + - leading_whitespace; + + /* Write text of label. If there is a newline in the middle of + a reference label, turn off underling until text starts again. */ + labelptr = entry->label + leading_whitespace; + while (*labelptr != '\n' && *labelptr) labelptr++; + newline_offset = labelptr - (entry->label + leading_whitespace); + + write_and_advance(&outptr, entry->label + leading_whitespace, + (*labelptr ? newline_offset : + strlen(entry->label) - leading_whitespace)); + + if (*labelptr) + { + int space_at_start_of_line; + + space_at_start_of_line = skip_whitespace_and_newlines (labelptr); + + /* Note we do this before the newline is output. This way if + the first half of the label is on the bottom line of the + screen, underlining will not be left on. */ + underlining_off (&outptr); + + /* Output newline and any whitespace at start of line */ + write_and_advance (&outptr, labelptr, space_at_start_of_line); + labelptr += space_at_start_of_line; + + underlining_on (&outptr); + + /* Output rest of label */ + write_and_advance (&outptr, labelptr, + entry->label + strlen(entry->label) - labelptr); + + /* Text of reference ends later in node because of terminal + control characters that were output. */ + if (preprocess_nodes_p) + { + entry->end += strlen (ANSI_UNDERLINING_ON); + entry->end += strlen (ANSI_UNDERLINING_OFF); + } + } + + underlining_off (&outptr); + + /* We've output the label, so now we can canonicalize it. */ + canonicalize_whitespace (entry->label); + + /* Now get target of reference. */ + + /* Read from after ':' to get target of reference. */ + nodeptr += colon_offset; nodeptr++; + + /* If this reference entry continues with another ':' then the + nodename is the same as the label. */ + if (*nodeptr == ':') + { + entry->nodename = xstrdup (entry->label); + + nodeptr++; + if (found_menu_entry) + { + /* Output two spaces to match the length of "::" */ + write_and_advance (&outptr, " ", 2); + } + } + else + { + /* This entry continues with a specific nodename. Parse the + nodename from the specification. */ + + int length; /* Length of specification */ + int i; + + if (found_menu_entry) + { + length = info_parse_node (nodeptr, PARSE_NODE_DFLT); + if (in_index) + { + /* For index nodes, output the destination as well, + which will be the name of the node the index entry + refers to. */ + write_and_advance (&outptr, nodeptr, length); + } + nodeptr += length; + } + else + { + char saved_char; + + length = info_parse_node (nodeptr, PARSE_NODE_SKIP_NEWLINES); + + /* TODO: Check if there is a newline in node specifier. + If so, output a newline and skip whitespace. */ + } + + if (info_parsed_filename) + entry->filename = xstrdup (info_parsed_filename); + + if (info_parsed_nodename) + entry->nodename = xstrdup (info_parsed_nodename); + + if (!preprocess_nodes_p) + entry->line_number = info_parsed_line_number; + else + /* Adjust line offset in file to one in displayed text */ + entry->line_number = info_parsed_line_number - deleted_lines; + + if (found_menu_entry && !in_index) + /* Output spaces the length of the node specifier to avoid + messing up left edge of second column of menu. */ + for (i = 0; i < length; i++) + write_and_advance (&outptr, " ", 1); + } + add_pointer_to_array (entry, refs_index, refs, refs_slots, 50); + + s.start = nodeptr - s.buffer; + if (s.start >= s.end) break; + } + + /* Search may have stopped too early because of null byte + in index marker ("address@hidden@^H]") or in image marker + ("address@hidden address@hidden"). Skip past these and try again. */ + char *ptr_to_null_byte; + + ptr_to_null_byte = nodeptr + strlen (nodeptr); + + /* Three byte sequence "address@hidden" starts tag. Check there is enough + space for this in the rest of the node. */ + if (ptr_to_null_byte <= node->contents + node->nodelen - 3) + { + ptr_to_null_byte += 3; + + /* Output is different for index nodes */ + if (!strcmp ("index", ptr_to_null_byte)) + in_index = 1; + + /* Go to second null byte */ + ptr_to_null_byte += strlen (ptr_to_null_byte); + + /* Three byte sequence "address@hidden" ends tag */ + if (ptr_to_null_byte <= node->contents + node->nodelen - 3) + { + /* Write out up to tag. */ + write_and_advance (&outptr, nodeptr, + ptr_to_null_byte + 3 - nodeptr); + + /* Point to first character after ']' */ + nodeptr = ptr_to_null_byte + 3; + + s.buffer = nodeptr; + s.start = 0; + s.end = node->nodelen - (nodeptr - node->contents); + goto search_again; + } + } + + /* If we haven't accidentally gone past the end of the node, write + out the rest of it. */ + if (nodeptr < node->contents + node->nodelen) + write_and_advance (&outptr, nodeptr, + (node->contents + node->nodelen) - nodeptr); + + node->references = refs; + + if (preprocess_nodes_p) + { + node->contents = new_contents; + node->flags &= N_WasRewritten; + node->nodelen = outptr - new_contents; + } +} + + +/* **************************************************************** */ +/* */ /* Functions Static To This File */ /* */ /* **************************************************************** */ diff --git a/info-utils.h b/info-utils.h index ec3be15..3b5db60 100644 --- a/info-utils.h +++ b/info-utils.h @@ -54,7 +54,7 @@ extern char *info_parsed_nodename; PARSE_NODE_START The STRING argument is retrieved from a node start line, and therefore ends in `,' only. */ -void info_parse_node (char *string, int flag); +int info_parse_node (char *string, int flag); /* Return a NULL terminated array of REFERENCE * which represents the menu found in NODE. If there is no menu in NODE, just return a NULL pointer. */ diff --git a/nodes.h b/nodes.h index 2220784..1bf85b2 100644 --- a/nodes.h +++ b/nodes.h @@ -78,6 +78,7 @@ typedef struct { #define N_CannotGC 0x20 /* File buffer cannot be gc'ed. */ #define N_IsManPage 0x40 /* This node is a manpage. */ #define N_FromAnchor 0x80 /* Synthesized for an anchor reference. */ +#define N_WasRewritten 0x100 /* NODE->contents can be passed to free(). */ /* Internal data structures. */ diff --git a/variables.c b/variables.c index 028099c..480bf8e 100644 --- a/variables.c +++ b/variables.c @@ -89,6 +89,11 @@ VARIABLE_ALIST info_variables[] = { { "search-skip-screen", N_("Skip current window when searching"), &search_skip_screen_p, (char **)on_off_choices }, + + { "preprocess-nodes", + N_("Remove Info file syntax from the text of nodes"), + &preprocess_nodes_p, (char **)on_off_choices }, + { NULL } }; diff --git a/variables.h b/variables.h index e81f01f..43fdc56 100644 --- a/variables.h +++ b/variables.h @@ -63,5 +63,6 @@ extern int ISO_Latin_p; extern int scroll_last_node; extern int min_search_length; extern int search_skip_screen_p; +extern int preprocess_nodes_p; #endif /* not INFO_VARIABLES_H */