#include #include #include "brace_expander.h" #include "errors.h" #include "expansion_data.h" #include "stack.h" #include "symbols.h" // #define DEBUG #ifdef DEBUG #include #define REPORT(...) printf(__VA_ARGS__) #else #define REPORT(...) #endif /*------------------------------------------------------------------------------ Expand brace expressions within a token. A token may contain one or more embedded brace expressions. Each brace expression consists of zero or more components, separated by a LIST_SEPARATOR character. Each component can be a range of values, or a string that may include another brace expression. Version 1.0 2014-12-16 Brian B. McGuinness ------------------------------------------------------------------------------*/ #ifdef DEBUG /*------------------------------------------------------------------------------ list_stack_contents - Display the contents of a stack ------------------------------------------------------------------------------*/ void list_stack_contents (expansion_stack *stack) { int i, j; for (i = 0; i <= stack->top; i++) { expansion_data *expansions = stack->data[i]; printf ( " Stack level %d, length = %d, current component starts at %d:\n ", i, expansions->expansions->length, expansions->part_starts_at ); for (j = 0; j < expansions->expansions->length; j++) { printf (" '%s'", expansions->expansions->list[j]->text); } printf ("\n"); } } #else #define list_stack_contents(stack) #endif /*------------------------------------------------------------------------------ append_string_to_tokens - Append a string to the current set of tokens, or add it as a separate token if there are no current tokens yet. list = The list of tokens containing the current set at the end start_at = The index where the current set of tokens begins in the list suffix = The string to append We return the updated list of tokens. ------------------------------------------------------------------------------*/ static void append_string_to_tokens (string_list **list, int start_at, string *suffix) { int length = (*list)->length; if (length > start_at) { for (int n = start_at; n < length; n++) { REPORT (" -- append_string_to_tokens: append '%s' to '%s'\n", &(*list)->list[n]->text, suffix->text); append_string (&(*list)->list[n], suffix); } } else append_string_to_list (list, suffix); } /*------------------------------------------------------------------------------ compare_strings - Compare two strings First, we scan past leading zeros, if any, in each string. Then we compare the remainder of the strings. If one string is longer, it is deemed greater. Otherwise, we do a lexical comparison. string1 = The first string to be compared string2 = The second string to be compared We return an integer value that is < 0 if string1 < string2, 0 if string1 = string2, or > 0 if string1 > string2. ------------------------------------------------------------------------------*/ int compare_strings (character *string1, character *string2) { if (string1 == NULL) return (string2 == NULL) ? 0 : -1; else if (string2 == NULL) return 1; // Scan past leading zeros, as they have no value while (*string1 == '0') string1++; while (*string2 == '0') string2++; // Note: size_t is unsigned size_t length1 = STRLEN (string1); size_t length2 = STRLEN (string2); // A longer string is deemed greater if (length1 < length2) return -1; if (length1 > length2) return 1; return STRCOMP (string1, string2); } /*------------------------------------------------------------------------------ convert_list_to_array - Convert a string list to an array of character arrays ------------------------------------------------------------------------------*/ character **convert_list_to_array (string_list *strlist) { character **tokens = malloc ((strlist->length + 1) * sizeof (character *)); int i; for (i = 0; i < strlist->length; i++) { REPORT ("Copying token %d\n", i); tokens[i] = get_character_array (strlist->list[i]); REPORT ("Got '%s'\n", tokens[i]); } tokens[strlist->length] = NULL; return tokens; } /*------------------------------------------------------------------------------ expand_braces - Expand brace expressions within a token. token = The token to be expanded We return the list of brace expansions of the token. A NULL pointer follows the last expansion in the list. ------------------------------------------------------------------------------*/ #define UNQUOTED ' ' character **expand_braces (character *token) { if (token[0] == BRACE_BEGIN && token[1] == '\0') { // Ignore isolated braces; these open blocks character **result = malloc (2 * sizeof (character *)); result[0] = token; result[0] = NULL; } else { string *buffer = new_string (); character previous_char = ' '; character previous_state = UNQUOTED; int range_break = 0; string_list *result = new_string_list (); expansion_stack *stack = new_stack (); character state = UNQUOTED; push (&stack, new_expansion_data (0)); #ifdef DEBUG printf (" Initialized stack:\n"); list_stack_contents (stack); // DEBUG #endif for (character *cp = token; *cp != '\0'; cp++) { character c = *cp; switch (state) { case UNQUOTED: // Looking for the next brace expression switch (c) { case BRACE_BEGIN: REPORT ("UNQUOTED BRACE_BEGIN: %c\n", c); if (range_break != 0) { report_error ("Braces are not allowed in a range: %s", token); continue; } REPORT ("push %d\n", stack->data[stack->top]->expansions->length); push (&stack, new_expansion_data (stack->data[stack->top]->expansions->length)); if (buffer->length > 0) { append_string_to_tokens ( &stack->data[stack->top - 1]->expansions, stack->data[stack->top - 1]->part_starts_at, buffer ); list_stack_contents (stack); // DEBUG } clear_string (buffer); previous_char = ' '; break; case BRACE_END: REPORT ("UNQUOTED BRACE_END: %c\n", c); if (stack->top < 1) { report_error ("Unpaired closing brace: %s", token); continue; } // fall through case LIST_SEPARATOR: // End of a component: expand it REPORT ("UNQUOTED LIST_SEPARATOR: %c\n", c); if (stack->top < 1) { append_char (&buffer, c); break; } if (range_break != 0) { // This component is a range string *start = substring (buffer, 0, range_break); string *end = substring (buffer, range_break + 2, buffer->length); REPORT (" range: '%s' to '%s'\n", start->text, end->text); for (string *value = start; compare_strings (value->text, end->text) <= 0; value = increment_string (value, end)) { REPORT (" next value = '%s'\n", value->text); append_string_to_list (&stack->data[stack->top]->expansions, value); } list_stack_contents (stack); // DEBUG delete_string (end); delete_string (start); range_break = 0; } else { // This component is a simple string REPORT (" simple string: '%s'\n", buffer->text); append_string_to_tokens ( &stack->data[stack->top]->expansions, stack->data[stack->top]->part_starts_at, buffer ); list_stack_contents (stack); // DEBUG } clear_string (buffer); stack->data[stack->top]->part_starts_at = stack->data[stack->top]->expansions->length; if (c == BRACE_END) { REPORT ("UNQUOTED LIST_SEPARATOR: process BRACE_END\n"); // Apply this brace level's expansions to the previous brace level int start = stack->data[stack->top - 1]->part_starts_at; int length = stack->data[stack->top - 1]->expansions->length; REPORT (" # expansions = %d, start at %d, stack top = %d\n", length, start, stack->top); if (length > start) { REPORT (" Create new list\n"); string_list *new_list = new_string_list (); for (int n = 0; n < start; n++) { REPORT (" [%2d]: '%s'\n", n, stack->data[stack->top - 1]->expansions->list[n]->text); append_string_to_list (&new_list, stack->data[stack->top - 1]->expansions->list[n]); } for (int n = start; n < length; n++) { for (int m = 0; m < stack->data[stack->top]->expansions->length; m++) { REPORT (" Create new string\n"); string *new = clone_string (stack->data[stack->top - 1]->expansions->list[n]); append_char_array (&new, stack->data[stack->top]->expansions->list[m]->text); REPORT (" [%2d]: '%s'\n", m, new->text); append_string_to_list (&new_list, new); delete_string (new); } } delete_string_list (stack->data[stack->top - 1]->expansions); stack->data[stack->top - 1]->expansions = new_list; list_stack_contents (stack); // DEBUG } else { REPORT (" Append expansions to list; # expansions = %d\n", stack->data[stack->top]->expansions->length); for (int m = 0; m < stack->data[stack->top]->expansions->length; m++) { REPORT (" [%2d]: '%s'\n", m, stack->data[stack->top]->expansions->list[m]); append_string_to_list ( &stack->data[stack->top - 1]->expansions, stack->data[stack->top]->expansions->list[m] ); } list_stack_contents (stack); // DEBUG } pop (&stack); } break; case RANGE: // Unquoted, unescaped range character REPORT ("RANGE: c = %c\n", c); append_char (&buffer, c); if (previous_char == c && range_break == 0) range_break = buffer->length - 2; break; case ESCAPE: REPORT ("ESCAPE: c = %c\n", c); append_char (&buffer, c); previous_state = UNQUOTED; state = ESCAPE; break; case STRONG_QUOTE: case WEAK_QUOTE: REPORT ("QUOTE: c = %c\n", c); state = c; append_char (&buffer, c); break; default: append_char (&buffer, c); break; } break; case ESCAPE: // The last character was an escape character REPORT ("ESCAPE: c = %c\n", c); append_char (&buffer, c); state = previous_state; break; case STRONG_QUOTE: // No brace expansion is done inside quotes case WEAK_QUOTE: REPORT ("QUOTE: c = %c\n", c); append_char (&buffer, c); if (c == ESCAPE) { previous_state = state; state = ESCAPE; } else if (c == state) state = UNQUOTED; break; } previous_char = c; } // end of token REPORT ("At end of token\n"); if (stack->top > 0) { report_error ("Unpaired opening brace: %s", token); } // Handle leftover text at the end of a token if (buffer->length > 0) { REPORT ("Processing leftover text at end of token\n"); if (stack->data[0]->expansions->length > 0) { // It's a suffix for a brace expansion for (int i = 0; i < stack->data[0]->expansions->length; i++) { append_string (&stack->data[0]->expansions->list[i], buffer); append_string_to_list (&result, stack->data[0]->expansions->list[i]); } } else append_string_to_list (&result, buffer); // It's a token with no brace expansions } // There's no leftover text to append, so just save the expansions for this token else for (int i = 0; i < stack->data[0]->expansions->length; i++) { REPORT ("Appending expansion %d to result: '%s'\n", i, stack->data[0]->expansions->list[i]->text); append_string_to_list (&result, stack->data[0]->expansions->list[i]); } delete_stack (stack); REPORT ("Copying %d token(s) to array\n", result->length); character **tokens = convert_list_to_array (result); delete_string_list (result); return tokens; } } /*------------------------------------------------------------------------------ increment_string - Increment an arbitrary character string by 1, with carry Starting with the rightmost character, we increment the character by 1 (replace it with its successor in the collating sequence). When we reach '9' (for a digit character) or 'z' or 'Z' (for an alphabetic character) we wrap around (to '0', 'a', or 'A') and implement carry by incrementing the next character to the left in the same way. The incrementing process skips over characters that are neither digits not letters, leaving them unaltered. When a carry results in our prefixing a new character, we check the pattern to determine whether to prefix a digit, a letter, or something else. The upper bound of a sequence of strings is often used as the pattern. That way, we build a character sequence that matches the final result. Alternatively, NULL can be used if no pattern is desired. In that case we prefix a character of the same type as the leftmost character in the current value. string = The string to be incremented pattern = The pattern to copy, or NULL buffer = A buffer to place the incremented string in buflen = The length of the buffer, in characters We return NULL on error, otherwise we return a pointer to the incremented string. ------------------------------------------------------------------------------*/ character SUCCESSOR[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56, 57, 48, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 65, 91, 92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 97, 123, 124, 125, 126, 127 }; string *increment_string (string *str, string *pattern) { if (str == NULL) return NULL; character *current; character last; for (current = &str->text[str->length - 1]; current >= str->text; current--) { last = *current; *current = (*current < 128) ? SUCCESSOR[*current] : *current; if (*current > last) break; } if (current < str->text) { // We need to prefix a character character prefix = *str->text; if (pattern != NULL) { int position = str->text + str->length - current; if (pattern->length >= position) { prefix = pattern->text[pattern->length - position]; } } if (isdigit (prefix)) prefix = '1'; else if (islower (prefix)) prefix = 'a'; else if (isupper (prefix)) prefix = 'A'; prefix_char (&str, prefix); } return str; }