// class template regex -*- C++ -*- // Copyright (C) 2013-2017 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the // terms of the GNU General Public License as published by the // Free Software Foundation; either version 3, or (at your option) // any later version. // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // Under Section 7 of GPL version 3, you are granted additional // permissions described in the GCC Runtime Library Exception, version // 3.1, as published by the Free Software Foundation. // You should have received a copy of the GNU General Public License and // a copy of the GCC Runtime Library Exception along with this program; // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // <http://www.gnu.org/licenses/>. /** * @file bits/regex_scanner.tcc * This is an internal header file, included by other library headers. * Do not attempt to use it directly. @headername{regex} */ // FIXME make comments doxygen format. // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep // and awk // 1) grep is basic except '\n' is treated as '|' // 2) egrep is extended except '\n' is treated as '|' // 3) awk is extended except special escaping rules, and there's no // back-reference. // // References: // // ECMAScript: ECMA-262 15.10 // // basic, extended: // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html // // awk: http://pubs.opengroup.org/onlinepubs/000095399/utilities/awk.html namespace std _GLIBCXX_VISIBILITY(default) { namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _CharT> _Scanner<_CharT>:: _Scanner(typename _Scanner::_IterT __begin, typename _Scanner::_IterT __end, _FlagT __flags, std::locale __loc) : _ScannerBase(__flags), _M_current(__begin), _M_end(__end), _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_eat_escape(_M_is_ecma() ? &_Scanner::_M_eat_escape_ecma : &_Scanner::_M_eat_escape_posix) { _M_advance(); } template<typename _CharT> void _Scanner<_CharT>:: _M_advance() { if (_M_current == _M_end) { _M_token = _S_token_eof; return; } if (_M_state == _S_state_normal) _M_scan_normal(); else if (_M_state == _S_state_in_bracket) _M_scan_in_bracket(); else if (_M_state == _S_state_in_brace) _M_scan_in_brace(); else { __glibcxx_assert(false); } } // Differences between styles: // 1) "\(", "\)", "\{" in basic. It's not escaping. // 2) "(?:", "(?=", "(?!" in ECMAScript. template<typename _CharT> void _Scanner<_CharT>:: _M_scan_normal() { auto __c = *_M_current++; if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr) { _M_token = _S_token_ord_char; _M_value.assign(1, __c); return; } if (__c == '\\') { if (_M_current == _M_end) __throw_regex_error( regex_constants::error_escape, "Unexpected end of regex when escaping."); if (!_M_is_basic() || (*_M_current != '(' && *_M_current != ')' && *_M_current != '{')) { (this->*_M_eat_escape)(); return; } __c = *_M_current++; } if (__c == '(') { if (_M_is_ecma() && *_M_current == '?') { if (++_M_current == _M_end) __throw_regex_error( regex_constants::error_paren, "Unexpected end of regex when in an open parenthesis."); if (*_M_current == ':') { ++_M_current; _M_token = _S_token_subexpr_no_group_begin; } else if (*_M_current == '=') { ++_M_current; _M_token = _S_token_subexpr_lookahead_begin; _M_value.assign(1, 'p'); } else if (*_M_current == '!') { ++_M_current; _M_token = _S_token_subexpr_lookahead_begin; _M_value.assign(1, 'n'); } else __throw_regex_error( regex_constants::error_paren, "Invalid special open parenthesis."); } else if (_M_flags & regex_constants::nosubs) _M_token = _S_token_subexpr_no_group_begin; else _M_token = _S_token_subexpr_begin; } else if (__c == ')') _M_token = _S_token_subexpr_end; else if (__c == '[') { _M_state = _S_state_in_bracket; _M_at_bracket_start = true; if (_M_current != _M_end && *_M_current == '^') { _M_token = _S_token_bracket_neg_begin; ++_M_current; } else _M_token = _S_token_bracket_begin; } else if (__c == '{') { _M_state = _S_state_in_brace; _M_token = _S_token_interval_begin; } else if (__c != ']' && __c != '}') { auto __it = _M_token_tbl; auto __narrowc = _M_ctype.narrow(__c, '\0'); for (; __it->first != '\0'; ++__it) if (__it->first == __narrowc) { _M_token = __it->second; return; } __glibcxx_assert(false); } else { _M_token = _S_token_ord_char; _M_value.assign(1, __c); } } // Differences between styles: // 1) different semantics of "[]" and "[^]". // 2) Escaping in bracket expr. template<typename _CharT> void _Scanner<_CharT>:: _M_scan_in_bracket() { if (_M_current == _M_end) __throw_regex_error( regex_constants::error_brack, "Unexpected end of regex when in bracket expression."); auto __c = *_M_current++; if (__c == '-') _M_token = _S_token_bracket_dash; else if (__c == '[') { if (_M_current == _M_end) __throw_regex_error(regex_constants::error_brack, "Unexpected character class open bracket."); if (*_M_current == '.') { _M_token = _S_token_collsymbol; _M_eat_class(*_M_current++); } else if (*_M_current == ':') { _M_token = _S_token_char_class_name; _M_eat_class(*_M_current++); } else if (*_M_current == '=') { _M_token = _S_token_equiv_class_name; _M_eat_class(*_M_current++); } else { _M_token = _S_token_ord_char; _M_value.assign(1, __c); } } // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted // literally. So "[]]" and "[^]]" are valid regexes. See the testcases // `*/empty_range.cc`. else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start)) { _M_token = _S_token_bracket_end; _M_state = _S_state_normal; } // ECMAScript and awk permits escaping in bracket. else if (__c == '\\' && (_M_is_ecma() || _M_is_awk())) (this->*_M_eat_escape)(); else { _M_token = _S_token_ord_char; _M_value.assign(1, __c); } _M_at_bracket_start = false; } // Differences between styles: // 1) "\}" in basic style. template<typename _CharT> void _Scanner<_CharT>:: _M_scan_in_brace() { if (_M_current == _M_end) __throw_regex_error( regex_constants::error_brace, "Unexpected end of regex when in brace expression."); auto __c = *_M_current++; if (_M_ctype.is(_CtypeT::digit, __c)) { _M_token = _S_token_dup_count; _M_value.assign(1, __c); while (_M_current != _M_end && _M_ctype.is(_CtypeT::digit, *_M_current)) _M_value += *_M_current++; } else if (__c == ',') _M_token = _S_token_comma; // basic use \}. else if (_M_is_basic()) { if (__c == '\\' && _M_current != _M_end && *_M_current == '}') { _M_state = _S_state_normal; _M_token = _S_token_interval_end; ++_M_current; } else __throw_regex_error(regex_constants::error_badbrace, "Unexpected character in brace expression."); } else if (__c == '}') { _M_state = _S_state_normal; _M_token = _S_token_interval_end; } else __throw_regex_error(regex_constants::error_badbrace, "Unexpected character in brace expression."); } template<typename _CharT> void _Scanner<_CharT>:: _M_eat_escape_ecma() { if (_M_current == _M_end) __throw_regex_error(regex_constants::error_escape, "Unexpected end of regex when escaping."); auto __c = *_M_current++; auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket)) { _M_token = _S_token_ord_char; _M_value.assign(1, *__pos); } else if (__c == 'b') { _M_token = _S_token_word_bound; _M_value.assign(1, 'p'); } else if (__c == 'B') { _M_token = _S_token_word_bound; _M_value.assign(1, 'n'); } // N3376 28.13 else if (__c == 'd' || __c == 'D' || __c == 's' || __c == 'S' || __c == 'w' || __c == 'W') { _M_token = _S_token_quoted_class; _M_value.assign(1, __c); } else if (__c == 'c') { if (_M_current == _M_end) __throw_regex_error( regex_constants::error_escape, "Unexpected end of regex when reading control code."); _M_token = _S_token_ord_char; _M_value.assign(1, *_M_current++); } else if (__c == 'x' || __c == 'u') { _M_value.erase(); for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++) { if (_M_current == _M_end || !_M_ctype.is(_CtypeT::xdigit, *_M_current)) __throw_regex_error( regex_constants::error_escape, "Unexpected end of regex when ascii character."); _M_value += *_M_current++; } _M_token = _S_token_hex_num; } // ECMAScript recognizes multi-digit back-references. else if (_M_ctype.is(_CtypeT::digit, __c)) { _M_value.assign(1, __c); while (_M_current != _M_end && _M_ctype.is(_CtypeT::digit, *_M_current)) _M_value += *_M_current++; _M_token = _S_token_backref; } else { _M_token = _S_token_ord_char; _M_value.assign(1, __c); } } // Differences between styles: // 1) Extended doesn't support backref, but basic does. template<typename _CharT> void _Scanner<_CharT>:: _M_eat_escape_posix() { if (_M_current == _M_end) __throw_regex_error(regex_constants::error_escape, "Unexpected end of regex when escaping."); auto __c = *_M_current; auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')); if (__pos != nullptr && *__pos != '\0') { _M_token = _S_token_ord_char; _M_value.assign(1, __c); } // We MUST judge awk before handling backrefs. There's no backref in awk. else if (_M_is_awk()) { _M_eat_escape_awk(); return; } else if (_M_is_basic() && _M_ctype.is(_CtypeT::digit, __c) && __c != '0') { _M_token = _S_token_backref; _M_value.assign(1, __c); } else { #ifdef __STRICT_ANSI__ // POSIX says it is undefined to escape ordinary characters __throw_regex_error(regex_constants::error_escape, "Unexpected escape character."); #else _M_token = _S_token_ord_char; _M_value.assign(1, __c); #endif } ++_M_current; } template<typename _CharT> void _Scanner<_CharT>:: _M_eat_escape_awk() { auto __c = *_M_current++; auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); if (__pos != nullptr) { _M_token = _S_token_ord_char; _M_value.assign(1, *__pos); } // \ddd for oct representation else if (_M_ctype.is(_CtypeT::digit, __c) && __c != '8' && __c != '9') { _M_value.assign(1, __c); for (int __i = 0; __i < 2 && _M_current != _M_end && _M_ctype.is(_CtypeT::digit, *_M_current) && *_M_current != '8' && *_M_current != '9'; __i++) _M_value += *_M_current++; _M_token = _S_token_oct_num; return; } else __throw_regex_error(regex_constants::error_escape, "Unexpected escape character."); } // Eats a character class or throws an exception. // __ch could be ':', '.' or '=', _M_current is the char after ']' when // returning. template<typename _CharT> void _Scanner<_CharT>:: _M_eat_class(char __ch) { for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;) _M_value += *_M_current++; if (_M_current == _M_end || *_M_current++ != __ch || _M_current == _M_end // skip __ch || *_M_current++ != ']') // skip ']' { if (__ch == ':') __throw_regex_error(regex_constants::error_ctype, "Unexpected end of character class."); else __throw_regex_error(regex_constants::error_collate, "Unexpected end of character class."); } } #ifdef _GLIBCXX_DEBUG template<typename _CharT> std::ostream& _Scanner<_CharT>:: _M_print(std::ostream& ostr) { switch (_M_token) { case _S_token_anychar: ostr << "any-character\n"; break; case _S_token_backref: ostr << "backref\n"; break; case _S_token_bracket_begin: ostr << "bracket-begin\n"; break; case _S_token_bracket_neg_begin: ostr << "bracket-neg-begin\n"; break; case _S_token_bracket_end: ostr << "bracket-end\n"; break; case _S_token_char_class_name: ostr << "char-class-name \"" << _M_value << "\"\n"; break; case _S_token_closure0: ostr << "closure0\n"; break; case _S_token_closure1: ostr << "closure1\n"; break; case _S_token_collsymbol: ostr << "collsymbol \"" << _M_value << "\"\n"; break; case _S_token_comma: ostr << "comma\n"; break; case _S_token_dup_count: ostr << "dup count: " << _M_value << "\n"; break; case _S_token_eof: ostr << "EOF\n"; break; case _S_token_equiv_class_name: ostr << "equiv-class-name \"" << _M_value << "\"\n"; break; case _S_token_interval_begin: ostr << "interval begin\n"; break; case _S_token_interval_end: ostr << "interval end\n"; break; case _S_token_line_begin: ostr << "line begin\n"; break; case _S_token_line_end: ostr << "line end\n"; break; case _S_token_opt: ostr << "opt\n"; break; case _S_token_or: ostr << "or\n"; break; case _S_token_ord_char: ostr << "ordinary character: \"" << _M_value << "\"\n"; break; case _S_token_subexpr_begin: ostr << "subexpr begin\n"; break; case _S_token_subexpr_no_group_begin: ostr << "no grouping subexpr begin\n"; break; case _S_token_subexpr_lookahead_begin: ostr << "lookahead subexpr begin\n"; break; case _S_token_subexpr_end: ostr << "subexpr end\n"; break; case _S_token_unknown: ostr << "-- unknown token --\n"; break; case _S_token_oct_num: ostr << "oct number " << _M_value << "\n"; break; case _S_token_hex_num: ostr << "hex number " << _M_value << "\n"; break; case _S_token_quoted_class: ostr << "quoted class " << "\\" << _M_value << "\n"; break; default: _GLIBCXX_DEBUG_ASSERT(false); } return ostr; } #endif _GLIBCXX_END_NAMESPACE_VERSION } // namespace __detail } // namespace
Name | Type | Size | Permission | Actions |
---|---|---|---|---|
algorithmfwd.h | File | 21.23 KB | 0644 |
|
alloc_traits.h | File | 19.13 KB | 0644 |
|
allocated_ptr.h | File | 3.43 KB | 0644 |
|
allocator.h | File | 6.73 KB | 0644 |
|
atomic_base.h | File | 23.28 KB | 0644 |
|
atomic_futex.h | File | 9.35 KB | 0644 |
|
atomic_lockfree_defines.h | File | 2.2 KB | 0644 |
|
basic_ios.h | File | 15.7 KB | 0644 |
|
basic_ios.tcc | File | 5.94 KB | 0644 |
|
basic_string.h | File | 235.15 KB | 0644 |
|
basic_string.tcc | File | 52.5 KB | 0644 |
|
boost_concept_check.h | File | 26.41 KB | 0644 |
|
c++0x_warning.h | File | 1.44 KB | 0644 |
|
char_traits.h | File | 20.26 KB | 0644 |
|
codecvt.h | File | 20.8 KB | 0644 |
|
concept_check.h | File | 3.34 KB | 0644 |
|
cpp_type_traits.h | File | 9.37 KB | 0644 |
|
cxxabi_forced.h | File | 1.77 KB | 0644 |
|
cxxabi_init_exception.h | File | 2.17 KB | 0644 |
|
deque.tcc | File | 33.26 KB | 0644 |
|
enable_special_members.h | File | 12.1 KB | 0644 |
|
exception.h | File | 2.23 KB | 0644 |
|
exception_defines.h | File | 1.61 KB | 0644 |
|
exception_ptr.h | File | 6.31 KB | 0644 |
|
forward_list.h | File | 47.5 KB | 0644 |
|
forward_list.tcc | File | 14.68 KB | 0644 |
|
fstream.tcc | File | 32.14 KB | 0644 |
|
functexcept.h | File | 3.18 KB | 0644 |
|
functional_hash.h | File | 8 KB | 0644 |
|
gslice.h | File | 5.39 KB | 0644 |
|
gslice_array.h | File | 7.59 KB | 0644 |
|
hash_bytes.h | File | 2.1 KB | 0644 |
|
hashtable.h | File | 71.56 KB | 0644 |
|
hashtable_policy.h | File | 65.47 KB | 0644 |
|
indirect_array.h | File | 7.68 KB | 0644 |
|
invoke.h | File | 3.57 KB | 0644 |
|
ios_base.h | File | 30.22 KB | 0644 |
|
istream.tcc | File | 30.36 KB | 0644 |
|
list.tcc | File | 15.54 KB | 0644 |
|
locale_classes.h | File | 24.31 KB | 0644 |
|
locale_classes.tcc | File | 8.18 KB | 0644 |
|
locale_conv.h | File | 15.7 KB | 0644 |
|
locale_facets.h | File | 89.98 KB | 0644 |
|
locale_facets.tcc | File | 38.64 KB | 0644 |
|
locale_facets_nonio.h | File | 67.33 KB | 0644 |
|
locale_facets_nonio.tcc | File | 44.46 KB | 0644 |
|
localefwd.h | File | 5.51 KB | 0644 |
|
mask_array.h | File | 7.42 KB | 0644 |
|
memoryfwd.h | File | 2.36 KB | 0644 |
|
move.h | File | 6.5 KB | 0644 |
|
nested_exception.h | File | 4.66 KB | 0644 |
|
node_handle.h | File | 7.97 KB | 0644 |
|
ostream.tcc | File | 12.03 KB | 0644 |
|
ostream_insert.h | File | 3.91 KB | 0644 |
|
parse_numbers.h | File | 7.84 KB | 0644 |
|
postypes.h | File | 8.02 KB | 0644 |
|
predefined_ops.h | File | 8.87 KB | 0644 |
|
ptr_traits.h | File | 4.75 KB | 0644 |
|
quoted_string.h | File | 4.47 KB | 0644 |
|
random.h | File | 171.19 KB | 0644 |
|
random.tcc | File | 103.08 KB | 0644 |
|
range_access.h | File | 9.75 KB | 0644 |
|
refwrap.h | File | 13.48 KB | 0644 |
|
regex.h | File | 95.9 KB | 0644 |
|
regex.tcc | File | 16.1 KB | 0644 |
|
regex_automaton.h | File | 10.47 KB | 0644 |
|
regex_automaton.tcc | File | 7.65 KB | 0644 |
|
regex_compiler.h | File | 16.79 KB | 0644 |
|
regex_compiler.tcc | File | 18.46 KB | 0644 |
|
regex_constants.h | File | 14.36 KB | 0644 |
|
regex_error.h | File | 4.85 KB | 0644 |
|
regex_executor.h | File | 7.31 KB | 0644 |
|
regex_executor.tcc | File | 16.91 KB | 0644 |
|
regex_scanner.h | File | 6.92 KB | 0644 |
|
regex_scanner.tcc | File | 14.66 KB | 0644 |
|
shared_ptr.h | File | 22.37 KB | 0644 |
|
shared_ptr_atomic.h | File | 9.54 KB | 0644 |
|
shared_ptr_base.h | File | 52.54 KB | 0644 |
|
slice_array.h | File | 9.13 KB | 0644 |
|
specfun.h | File | 44.03 KB | 0644 |
|
sstream.tcc | File | 9.88 KB | 0644 |
|
std_abs.h | File | 3.21 KB | 0644 |
|
std_function.h | File | 23.41 KB | 0644 |
|
std_mutex.h | File | 9.08 KB | 0644 |
|
stl_algo.h | File | 208.85 KB | 0644 |
|
stl_algobase.h | File | 49.22 KB | 0644 |
|
stl_bvector.h | File | 33.03 KB | 0644 |
|
stl_construct.h | File | 7.22 KB | 0644 |
|
stl_deque.h | File | 75.95 KB | 0644 |
|
stl_function.h | File | 32.5 KB | 0644 |
|
stl_heap.h | File | 19.73 KB | 0644 |
|
stl_iterator.h | File | 40.77 KB | 0644 |
|
stl_iterator_base_funcs.h | File | 7.88 KB | 0644 |
|
stl_iterator_base_types.h | File | 8.48 KB | 0644 |
|
stl_list.h | File | 63.65 KB | 0644 |
|
stl_map.h | File | 50.02 KB | 0644 |
|
stl_multimap.h | File | 39 KB | 0644 |
|
stl_multiset.h | File | 33.26 KB | 0644 |
|
stl_numeric.h | File | 13.51 KB | 0644 |
|
stl_pair.h | File | 18.21 KB | 0644 |
|
stl_queue.h | File | 21.61 KB | 0644 |
|
stl_raw_storage_iter.h | File | 3.74 KB | 0644 |
|
stl_relops.h | File | 4.49 KB | 0644 |
|
stl_set.h | File | 33.63 KB | 0644 |
|
stl_stack.h | File | 11.18 KB | 0644 |
|
stl_tempbuf.h | File | 8.15 KB | 0644 |
|
stl_tree.h | File | 73.07 KB | 0644 |
|
stl_uninitialized.h | File | 27 KB | 0644 |
|
stl_vector.h | File | 54.33 KB | 0644 |
|
stream_iterator.h | File | 6.5 KB | 0644 |
|
streambuf.tcc | File | 4.81 KB | 0644 |
|
streambuf_iterator.h | File | 12.33 KB | 0644 |
|
string_view.tcc | File | 6.48 KB | 0644 |
|
stringfwd.h | File | 2.55 KB | 0644 |
|
uniform_int_dist.h | File | 9.91 KB | 0644 |
|
unique_ptr.h | File | 24.88 KB | 0644 |
|
unordered_map.h | File | 67.04 KB | 0644 |
|
unordered_set.h | File | 52.56 KB | 0644 |
|
uses_allocator.h | File | 6.23 KB | 0644 |
|
valarray_after.h | File | 22.12 KB | 0644 |
|
valarray_array.h | File | 21.3 KB | 0644 |
|
valarray_array.tcc | File | 7.08 KB | 0644 |
|
valarray_before.h | File | 18.08 KB | 0644 |
|
vector.tcc | File | 26.95 KB | 0644 |
|