#include #include "BraceExpander.h" #include "errors.h" #include "symbols.h" #include "types.h" /*------------------------------------------------------------------------------ Expand brace expressions within each token in a list. Each brace expression consists of zero or more components, separated by a LIST_SEPARATOR character. Each component can be a range of values or a string that may include another brace expression. Version 1.0 2014-12-16 Brian B. McGuinness ------------------------------------------------------------------------------*/ /*------------------------------------------------------------------------------ append_string_to_tokens - Append a string to the current set of tokens, or add it as a separate token if there are no current tokens yet. list = The list of tokens containing the current set at the end startAt = The index where the current set of tokens begins in the list suffix = The string to append We return the updated list of tokens. ------------------------------------------------------------------------------*/ void BraceExpander::append_string_to_tokens (std::vector &list, int startAt, String suffix) { int length = list.size(); if (length > startAt) { for (int n = startAt; n < length; n++) { list.at (n).append (suffix); } } else list.push_back (suffix); } /*------------------------------------------------------------------------------ compare_strings - Compare two strings, ignoring leading zeros First, we scan past leading zeros, if any, in each string. Then we compare the remainder of the strings. If one string is longer, it is deemed greater. Otherwise, we do a lexical comparison. string1 = The first string to be compared string2 = The second string to be compared We return an integer value that is < 0 if string1 < string2, 0 if string1 = string2, or > 0 if string1 > string2. ------------------------------------------------------------------------------*/ int BraceExpander::compare_strings (const String &string1, const String &string2) { size_t length1 = string1.length(); size_t length2 = string2.length(); // Scan past leading zeros, as they have no value size_t index1 = 0, index2 = 0; while (string1[index1] == '0' && index1 < length1) index1++; while (string2[index2] == '0' && index2 < length2) index2++; // A longer string is deemed greater length1 -= index1; length2 -= index2; if (length1 < length2) return -1; if (length1 > length2) return 1; // They're the same length, so perform a lexical comparison return string1.compare (index1, String::npos, string2, index2, String::npos); } /*------------------------------------------------------------------------------ expandBraces - Expand brace expressions within each token in a list. tokens = The list of tokens We return the list of tokens with the brace expressions expanded. ------------------------------------------------------------------------------*/ struct ExpansionData { int part_starts_at; // Index in the expansion list where the current component starts std::vector expansions; // Tokens generated by expansions ExpansionData (int starts_at) { part_starts_at = starts_at; } }; std::vector BraceExpander::expandBraces (std::vector &tokens) { const Char UNQUOTED = ' '; std::vector result; for (String token : tokens) { // Ignore isolated braces; these open blocks if (token.length() == 1 && token[0] == BRACE_BEGIN) { result.push_back (token); continue; } String buffer; Char previous_char = ' '; Char previous_state = UNQUOTED; int range_break = 0; std::vector stack; int stack_top = 0; Char state = UNQUOTED; int token_length = token.length(); stack.push_back (ExpansionData (0)); for (int i = 0; i < token_length; i++) { char c = token.at (i); switch (state) { case UNQUOTED: // Looking for the next brace expression switch (c) { case BRACE_BEGIN: if (range_break != 0) throw SyntaxError (token.insert (0, "Braces are not allowed in a range: ").c_str()); stack_top++; stack.push_back (ExpansionData (stack[stack_top - 1].expansions.size())); if (buffer.length() > 0) { append_string_to_tokens (stack[stack_top - 1].expansions, stack[stack_top - 1].part_starts_at, buffer); } buffer.clear(); previous_char = ' '; break; case BRACE_END: if (stack.empty()) throw SyntaxError (token.insert (0, "Unpaired closing brace: ").c_str()); // fall through case LIST_SEPARATOR: // End of a component: expand it if (stack.empty()) { buffer.push_back (c); break; } if (range_break != 0) { // This component is a range String start = buffer.substr (0, range_break); String end = buffer.substr (range_break + 2); for (String value = start; compare_strings (value, end) <= 0; value = increment_string (value, end)) { stack[stack_top].expansions.push_back (value); } range_break = 0; } else { // This component is a simple string append_string_to_tokens (stack[stack_top].expansions, stack[stack_top].part_starts_at, buffer); } buffer.clear (); stack[stack_top].part_starts_at = stack[stack_top].expansions.size(); if (c == BRACE_END) { // Apply this brace level's expansions to the previous brace level int start = stack[stack_top - 1].part_starts_at; int length = stack[stack_top - 1].expansions.size(); if (length > start) { std::vector newList; for (int n = 0; n < start; n++) newList.push_back (stack[stack_top - 1].expansions.at (n)); for (int n = start; n < length; n++) { for (String end: stack[stack_top].expansions) { newList.push_back (stack[stack_top - 1].expansions.at (n) + end); } } stack[stack_top - 1].expansions = newList; } else { for (String expansion : stack[stack_top].expansions) { stack[stack_top - 1].expansions.push_back (expansion); } } stack.pop_back(); stack_top--; } break; case RANGE: // Unquoted, unescaped range character buffer.push_back (c); if (previous_char == c && range_break == 0) range_break = buffer.length() - 2; break; case ESCAPE: previous_state = UNQUOTED; state = ESCAPE; buffer.push_back (c); break; case STRONG_QUOTE: case WEAK_QUOTE: state = c; buffer.push_back (c); break; default: buffer.push_back (c); break; } break; case ESCAPE: // The last character was an escape character buffer.push_back (c); state = previous_state; break; case STRONG_QUOTE: // No brace expansion is done inside quotes case WEAK_QUOTE: buffer.push_back (c); if (c == ESCAPE) { previous_state = state; state = ESCAPE; } else if (c == state) state = UNQUOTED; break; } previous_char = c; } if (stack_top > 0) throw SyntaxError (token.insert (0, "Unpaired opening brace: ").c_str()); // Handle leftover text at the end of a token if (buffer.length() > 0) { if (stack[0].expansions.size() > 0) { // It's a suffix for a brace expansion String suffix = buffer; for (String expansion : stack[0].expansions) { result.push_back (expansion + suffix); } } else result.push_back (buffer); // It's a token with no brace expansions } // There's no leftover text to append, so just save the expansions for this token else for (String expansion : stack[0].expansions) result.push_back (expansion); } return result; } /*------------------------------------------------------------------------------ increment_string - Increment a string by 1, with carry. Starting with the rightmost character, we increment the character by 1 (replace it with its successor in the collating sequence). When we reach '9' (for a digit character) or 'z' or 'Z' (for an alphabetic character) we wrap around (to '0', 'a', or 'A') and implement carry by incrementing the next character to the left in the same way. The incrementing process skips over characters that are neither digits not letters, leaving them unaltered. When a carry results in our prefixing a new character, we check the pattern to determine whether to prefix a digit, a letter, or something else. The upper bound of a sequence of strings is often used as the pattern. That way, we build a character sequence that matches the final result. Alternatively, "" can be used if no pattern is desired. In that case we prefix a character of the same type as the leftmost character in the current value. string = The string to be incremented pattern = The pattern to use for prefixing new characters (e.g. "0cd-4b") We return the incremented string. ------------------------------------------------------------------------------*/ Char BraceExpander::SUCCESSOR[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56, 57, 48, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 65, 91, 92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 97, 123, 124, 125, 126, 127 }; String &BraceExpander::increment_string (String &value, const String &pattern) { size_t value_length = value.length(); for (int index = value_length - 1; index >= 0; index--) { Char current = value[index]; Char next = (current < 128) ? SUCCESSOR[current] : current; value[index] = next; if (next > current) break; // Perform a carry if (index == 0) { // We need to prefix a new digit size_t pattern_length = pattern.length(); if (pattern_length > value_length) { current = pattern[pattern_length - value_length - 1]; } if (isdigit (current)) current = '1'; else if (islower (current)) current = 'a'; else if (isupper (current)) current = 'A'; value.insert (0, 1, current); break; } } return value; }