1 | // SPDX-License-Identifier: GPL-3.0-or-later |
2 | |
3 | #include "parser.hpp" |
4 | |
5 | #include <iostream> |
6 | #include <memory> |
7 | #include <regex> |
8 | #include <stdexcept> |
9 | #include <string> |
10 | #include <vector> |
11 | |
12 | static std::vector<token> lex(const std::string &cmd) |
13 | { |
14 | if (cmd.empty()) |
15 | return {}; |
16 | |
17 | const char *data = cmd.data(); |
18 | const char *const end = data + cmd.size(); |
19 | std::vector<token> tokens; |
20 | std::string curr; |
21 | bool in_double_quotes = false; |
22 | |
23 | const auto flush_curr = [&]() |
24 | { |
25 | if (!curr.empty()) |
26 | tokens.push_back(x: token(curr)), curr.clear(); |
27 | }; |
28 | |
29 | for (; data < end; data++) |
30 | { |
31 | switch (*data) |
32 | { |
33 | case '\\': |
34 | { |
35 | ++data; |
36 | if (data == end) |
37 | throw std::runtime_error("unexpected end of command" ); |
38 | if (!in_double_quotes && *data != '\n') |
39 | curr += *data; |
40 | else |
41 | { |
42 | switch (*data) |
43 | { |
44 | case '\n': break; |
45 | case '$': curr += '$'; break; |
46 | case '`': curr += '`'; break; |
47 | case '"': curr += '"'; break; |
48 | case '\\': curr += '\\'; break; |
49 | default: curr += '\\', curr += *data; break; |
50 | } |
51 | } |
52 | break; |
53 | } |
54 | |
55 | case '\'': |
56 | { |
57 | if (in_double_quotes) |
58 | goto append_data; |
59 | |
60 | // read until next single quote |
61 | while (++data < end) |
62 | { |
63 | if (*data == '\'') |
64 | break; |
65 | curr += *data; |
66 | } |
67 | |
68 | if (data == end) |
69 | throw std::runtime_error("Unterminated single quote" ); |
70 | |
71 | break; |
72 | } |
73 | |
74 | case '"': |
75 | { |
76 | in_double_quotes = !in_double_quotes; |
77 | break; |
78 | } |
79 | |
80 | case '$': |
81 | { |
82 | // read until next token |
83 | std::string var = "$" ; |
84 | while (++data < end) |
85 | { |
86 | if (*data == ' ' || *data == '\t' || *data == '\n' || *data == '\r' || *data == '\'' || *data == '"' || *data == '`') |
87 | { |
88 | // possibly end the double or single quote |
89 | if (in_double_quotes && *data == '"') |
90 | in_double_quotes = false; |
91 | break; |
92 | } |
93 | var += *data; |
94 | } |
95 | |
96 | // match and replace the variable |
97 | const auto regex = std::regex("(\\$[a-zA-Z0-9_]+)" ); |
98 | |
99 | std::smatch match; |
100 | while (std::regex_search(s: var, m&: match, e: regex)) |
101 | { |
102 | const auto varname = match[1].str(); |
103 | const auto value = getenv(name: varname.c_str() + 1); // skip the $ |
104 | |
105 | if (value) |
106 | var = std::regex_replace(s: var, e: regex, fmt: value); |
107 | else |
108 | var = std::regex_replace(s: var, e: regex, fmt: "" ); |
109 | } |
110 | |
111 | curr += var; |
112 | break; |
113 | } |
114 | |
115 | /// A space or tab ends a token. |
116 | case ' ': |
117 | case '\t': |
118 | { |
119 | if (in_double_quotes) |
120 | goto append_data; |
121 | flush_curr(); |
122 | break; |
123 | } |
124 | |
125 | case '&': |
126 | { |
127 | if (in_double_quotes) |
128 | goto append_data; |
129 | flush_curr(); |
130 | tokens.push_back(x: token(BACKGROUND)); |
131 | break; |
132 | } |
133 | |
134 | case '|': |
135 | { |
136 | if (in_double_quotes) |
137 | goto append_data; |
138 | flush_curr(); |
139 | tokens.push_back(x: token(PIPE)); |
140 | break; |
141 | } |
142 | |
143 | case '#': |
144 | { |
145 | if (in_double_quotes) |
146 | goto append_data; |
147 | return tokens; |
148 | } |
149 | |
150 | case '<': |
151 | case '>': |
152 | { |
153 | if (in_double_quotes) |
154 | goto append_data; |
155 | flush_curr(); |
156 | tokens.push_back(x: token(*data == '<' ? REDIRECT_IN : REDIRECT_OUT)); |
157 | break; |
158 | } |
159 | |
160 | default: |
161 | { |
162 | append_data: |
163 | curr += *data; |
164 | break; |
165 | } |
166 | } |
167 | } |
168 | |
169 | if (in_double_quotes) |
170 | throw std::runtime_error("Unterminated double quote" ); |
171 | if (!curr.empty()) |
172 | tokens.push_back(x: { curr }); |
173 | return tokens; |
174 | } |
175 | |
176 | // implement a LL(1) parser for the shell grammar |
177 | static std::unique_ptr<ProgramSpec> parse_program(std::vector<token> &tokens) |
178 | { |
179 | if (tokens.empty()) |
180 | return nullptr; |
181 | |
182 | auto program = std::make_unique<ProgramSpec>(); |
183 | program->argv.push_back(x: tokens[0].tstring); |
184 | |
185 | tokens.erase(position: tokens.begin()); |
186 | |
187 | while (!tokens.empty()) |
188 | { |
189 | const auto current_token = tokens[0]; |
190 | |
191 | tokens.erase(position: tokens.begin()); |
192 | |
193 | // implement a left-recursive descent lookahead 1 parser |
194 | switch (current_token.type) |
195 | { |
196 | case REDIRECT_IN: |
197 | case REDIRECT_OUT: |
198 | case REDIRECT_APPEND: |
199 | { |
200 | if (tokens.empty()) |
201 | throw std::runtime_error("Expected a filename after redirection" ); |
202 | |
203 | const auto filename = tokens[0].tstring; |
204 | tokens.erase(position: tokens.begin()); |
205 | |
206 | // 'program 2>&1' is not supported |
207 | const auto fd = current_token.type == REDIRECT_IN ? 0 : 1; |
208 | const auto mode = current_token.type == REDIRECT_IN ? BaseRedirection::IOMode::ReadOnly : BaseRedirection::IOMode::WriteOnly; |
209 | const auto append = current_token.type == REDIRECT_APPEND; |
210 | |
211 | program->redirections[fd] = std::make_unique<FileRedirection>(args: filename, args: mode, args: append); |
212 | break; |
213 | } |
214 | |
215 | case PIPE: |
216 | { |
217 | std::cout << "PIPE isn't supported yet" << std::endl; |
218 | break; |
219 | } |
220 | |
221 | case BACKGROUND: |
222 | { |
223 | program->background = true; |
224 | break; |
225 | } |
226 | |
227 | case END: |
228 | { |
229 | return program; |
230 | } |
231 | |
232 | default: |
233 | { |
234 | program->argv.push_back(x: current_token.tstring); |
235 | break; |
236 | } |
237 | } |
238 | } |
239 | |
240 | return program; |
241 | } |
242 | |
243 | std::unique_ptr<ProgramSpec> parse_commandline(const std::string &command) |
244 | { |
245 | try |
246 | { |
247 | auto tokens = lex(cmd: command); |
248 | return parse_program(tokens); |
249 | } |
250 | catch (const std::exception &e) |
251 | { |
252 | std::cerr << "shlex: " << e.what() << std::endl; |
253 | return {}; |
254 | } |
255 | } |
256 | |