1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2024 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex_executor.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31// FIXME convert comments to doxygen format.
32
33namespace std _GLIBCXX_VISIBILITY(default)
34{
35_GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37namespace __detail
38{
39 /**
40 * @addtogroup regex-detail
41 * @{
42 */
43
44 /**
45 * @brief Takes a regex and an input string and does the matching.
46 *
47 * The %_Executor class has two modes: DFS mode and BFS mode, controlled
48 * by the template parameter %__dfs_mode.
49 */
50 template<typename _BiIter, typename _Alloc, typename _TraitsT,
51 bool __dfs_mode>
52 class _Executor
53 {
54 using __search_mode = integral_constant<bool, __dfs_mode>;
55 using __dfs = true_type;
56 using __bfs = false_type;
57
58 enum class _Match_mode : unsigned char { _Exact, _Prefix };
59
60 public:
61 typedef typename iterator_traits<_BiIter>::value_type _CharT;
62 typedef basic_regex<_CharT, _TraitsT> _RegexT;
63 typedef _GLIBCXX_STD_C::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
64 typedef regex_constants::match_flag_type _FlagT;
65 typedef typename _TraitsT::char_class_type _ClassT;
66 typedef _NFA<_TraitsT> _NFAT;
67
68 public:
69 _Executor(_BiIter __begin,
70 _BiIter __end,
71 _ResultsVec& __results,
72 const _RegexT& __re,
73 _FlagT __flags)
74 : _M_cur_results(__results.get_allocator()),
75 _M_begin(__begin),
76 _M_end(__end),
77 _M_re(__re),
78 _M_nfa(*__re._M_automaton),
79 _M_results(__results),
80 _M_rep_count(_M_nfa.size()),
81 _M_states(_M_nfa._M_start(), _M_nfa.size()),
82 _M_flags(__flags)
83 {
84 using namespace regex_constants;
85 if (__flags & match_prev_avail) // ignore not_bol and not_bow
86 _M_flags &= ~(match_not_bol | match_not_bow);
87 }
88
89 // Set matched when string exactly matches the pattern.
90 bool
91 _M_match()
92 {
93 _M_current = _M_begin;
94 return _M_main(match_mode: _Match_mode::_Exact);
95 }
96
97 // Set matched when some prefix of the string matches the pattern.
98 bool
99 _M_search_from_first()
100 {
101 _M_current = _M_begin;
102 return _M_main(match_mode: _Match_mode::_Prefix);
103 }
104
105 bool
106 _M_search();
107
108 private:
109 void
110 _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
111
112 void
113 _M_handle_repeat(_Match_mode, _StateIdT);
114
115 void
116 _M_handle_subexpr_begin(_Match_mode, _StateIdT);
117
118 void
119 _M_handle_subexpr_end(_Match_mode, _StateIdT);
120
121 void
122 _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
123
124 void
125 _M_handle_line_end_assertion(_Match_mode, _StateIdT);
126
127 void
128 _M_handle_word_boundary(_Match_mode, _StateIdT);
129
130 void
131 _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
132
133 void
134 _M_handle_match(_Match_mode, _StateIdT);
135
136 void
137 _M_handle_backref(_Match_mode, _StateIdT);
138
139 void
140 _M_handle_accept(_Match_mode, _StateIdT);
141
142 void
143 _M_handle_alternative(_Match_mode, _StateIdT);
144
145 void
146 _M_dfs(_Match_mode __match_mode, _StateIdT __start);
147
148 bool
149 _M_main(_Match_mode __match_mode)
150 { return _M_main_dispatch(__match_mode, __search_mode{}); }
151
152 bool
153 _M_main_dispatch(_Match_mode __match_mode, __dfs);
154
155 bool
156 _M_main_dispatch(_Match_mode __match_mode, __bfs);
157
158 bool
159 _M_is_word(_CharT __ch) const
160 {
161 static const _CharT __s[2] = { 'w' };
162 return _M_re._M_automaton->_M_traits.isctype
163 (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
164 }
165
166 bool
167 _M_at_begin() const
168 {
169 if (_M_current == _M_begin)
170 {
171 // match_not_bol means ^ does not match [_M_begin,_M_begin)
172 if (_M_flags & regex_constants::match_not_bol)
173 return false;
174 // match_prev_avail means _M_begin is not the start of the input.
175 if (_M_flags & regex_constants::match_prev_avail)
176 {
177 // For ECMAScript multiline matches, check if the previous
178 // character is a line terminator.
179 if (_M_match_multiline())
180 return _M_is_line_terminator(c: *std::prev(_M_current));
181 else
182 return false;
183 }
184 else // ^ matches at _M_begin
185 return true;
186 }
187 else if (_M_match_multiline())
188 return _M_is_line_terminator(c: *std::prev(_M_current));
189 else
190 return false;
191 }
192
193 bool
194 _M_at_end() const
195 {
196 if (_M_current == _M_end)
197 return !(_M_flags & regex_constants::match_not_eol);
198 else if (_M_match_multiline())
199 return _M_is_line_terminator(c: *_M_current);
200 else
201 return false;
202 }
203
204 bool
205 _M_word_boundary() const;
206
207 bool
208 _M_lookahead(_StateIdT __next);
209
210 bool
211 _M_is_line_terminator(_CharT __c) const
212 {
213 const auto& __traits = _M_re._M_automaton->_M_traits;
214 const auto& __ct = use_facet<ctype<_CharT>>(__traits.getloc());
215 const char __n{ __ct.narrow(__c, ' ') };
216 if (__n == '\n')
217 return true;
218 if (_M_re._M_automaton->_M_options() & regex_constants::ECMAScript)
219 {
220 if (__n == '\r')
221 return true;
222 // FIXME: U+2028 (line separator) and U+2029 (paragraph separator)
223 }
224 return false;
225 }
226
227 bool
228 _M_match_multiline() const noexcept
229 {
230 constexpr auto __m
231 = regex_constants::ECMAScript | regex_constants::__multiline;
232 return (_M_re._M_automaton->_M_options() & __m) == __m;
233 }
234
235 // Holds additional information used in BFS-mode.
236 template<typename _SearchMode, typename _ResultsVec>
237 struct _State_info;
238
239 template<typename _ResultsVec>
240 struct _State_info<__bfs, _ResultsVec>
241 {
242 explicit
243 _State_info(_StateIdT __start, size_t __n)
244 : _M_visited_states(new bool[__n]()), _M_start(__start)
245 { }
246
247 ~_State_info() { delete[] _M_visited_states; }
248
249 _State_info(const _State_info&) = delete;
250 _State_info& operator=(const _State_info&) = delete;
251
252 bool _M_visited(_StateIdT __i)
253 {
254 if (_M_visited_states[__i])
255 return true;
256 _M_visited_states[__i] = true;
257 return false;
258 }
259
260 void _M_queue(_StateIdT __i, const _ResultsVec& __res)
261 { _M_match_queue.emplace_back(__i, __res); }
262
263 // Dummy implementations for BFS mode.
264 _BiIter* _M_get_sol_pos() { return nullptr; }
265
266 // Saves states that need to be considered for the next character.
267 _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
268 // Indicates which states are already visited.
269 bool* _M_visited_states;
270 // To record current solution.
271 _StateIdT _M_start;
272 };
273
274 template<typename _ResultsVec>
275 struct _State_info<__dfs, _ResultsVec>
276 {
277 explicit
278 _State_info(_StateIdT __start, size_t) : _M_start(__start)
279 { }
280
281 // Dummy implementations for DFS mode.
282 bool _M_visited(_StateIdT) const { return false; }
283 void _M_queue(_StateIdT, const _ResultsVec&) { }
284
285 _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
286
287 // To record current solution.
288 _StateIdT _M_start;
289 _BiIter _M_sol_pos;
290 };
291
292 public:
293 _ResultsVec _M_cur_results;
294 _BiIter _M_current;
295 _BiIter _M_begin;
296 const _BiIter _M_end;
297 const _RegexT& _M_re;
298 const _NFAT& _M_nfa;
299 _ResultsVec& _M_results;
300 _GLIBCXX_STD_C::vector<pair<_BiIter, int>> _M_rep_count;
301 _State_info<__search_mode, _ResultsVec> _M_states;
302 _FlagT _M_flags;
303 // Do we have a solution so far?
304 bool _M_has_sol;
305 };
306
307 ///@} regex-detail
308} // namespace __detail
309_GLIBCXX_END_NAMESPACE_VERSION
310} // namespace std
311
312#include <bits/regex_executor.tcc>
313