1// SPDX-License-Identifier: GPL-3.0-or-later
2
3#include "parser.hpp"
4
5#include <iostream>
6#include <memory>
7#include <regex>
8#include <stdexcept>
9#include <string>
10#include <vector>
11
12static std::vector<token> lex(const std::string &cmd)
13{
14 if (cmd.empty())
15 return {};
16
17 const char *data = cmd.data();
18 const char *const end = data + cmd.size();
19 std::vector<token> tokens;
20 std::string curr;
21 bool in_double_quotes = false;
22
23 const auto flush_curr = [&]()
24 {
25 if (!curr.empty())
26 tokens.push_back(x: token(curr)), curr.clear();
27 };
28
29 for (; data < end; data++)
30 {
31 switch (*data)
32 {
33 case '\\':
34 {
35 ++data;
36 if (data == end)
37 throw std::runtime_error("unexpected end of command");
38 if (!in_double_quotes && *data != '\n')
39 curr += *data;
40 else
41 {
42 switch (*data)
43 {
44 case '\n': break;
45 case '$': curr += '$'; break;
46 case '`': curr += '`'; break;
47 case '"': curr += '"'; break;
48 case '\\': curr += '\\'; break;
49 default: curr += '\\', curr += *data; break;
50 }
51 }
52 break;
53 }
54
55 case '\'':
56 {
57 if (in_double_quotes)
58 goto append_data;
59
60 // read until next single quote
61 while (++data < end)
62 {
63 if (*data == '\'')
64 break;
65 curr += *data;
66 }
67
68 if (data == end)
69 throw std::runtime_error("Unterminated single quote");
70
71 break;
72 }
73
74 case '"':
75 {
76 in_double_quotes = !in_double_quotes;
77 break;
78 }
79
80 case '$':
81 {
82 // read until next token
83 std::string var = "$";
84 while (++data < end)
85 {
86 if (*data == ' ' || *data == '\t' || *data == '\n' || *data == '\r' || *data == '\'' || *data == '"' || *data == '`')
87 {
88 // possibly end the double or single quote
89 if (in_double_quotes && *data == '"')
90 in_double_quotes = false;
91 break;
92 }
93 var += *data;
94 }
95
96 // match and replace the variable
97 const auto regex = std::regex("(\\$[a-zA-Z0-9_]+)");
98
99 std::smatch match;
100 while (std::regex_search(s: var, m&: match, e: regex))
101 {
102 const auto varname = match[1].str();
103 const auto value = getenv(name: varname.c_str() + 1); // skip the $
104
105 if (value)
106 var = std::regex_replace(s: var, e: regex, fmt: value);
107 else
108 var = std::regex_replace(s: var, e: regex, fmt: "");
109 }
110
111 curr += var;
112 break;
113 }
114
115 /// A space or tab ends a token.
116 case ' ':
117 case '\t':
118 {
119 if (in_double_quotes)
120 goto append_data;
121 flush_curr();
122 break;
123 }
124
125 case '&':
126 {
127 if (in_double_quotes)
128 goto append_data;
129 flush_curr();
130 tokens.push_back(x: token(BACKGROUND));
131 break;
132 }
133
134 case '|':
135 {
136 if (in_double_quotes)
137 goto append_data;
138 flush_curr();
139 tokens.push_back(x: token(PIPE));
140 break;
141 }
142
143 case '#':
144 {
145 if (in_double_quotes)
146 goto append_data;
147 return tokens;
148 }
149
150 case '<':
151 case '>':
152 {
153 if (in_double_quotes)
154 goto append_data;
155 flush_curr();
156 tokens.push_back(x: token(*data == '<' ? REDIRECT_IN : REDIRECT_OUT));
157 break;
158 }
159
160 default:
161 {
162 append_data:
163 curr += *data;
164 break;
165 }
166 }
167 }
168
169 if (in_double_quotes)
170 throw std::runtime_error("Unterminated double quote");
171 if (!curr.empty())
172 tokens.push_back(x: { curr });
173 return tokens;
174}
175
176// implement a LL(1) parser for the shell grammar
177static std::unique_ptr<ProgramSpec> parse_program(std::vector<token> &tokens)
178{
179 if (tokens.empty())
180 return nullptr;
181
182 auto program = std::make_unique<ProgramSpec>();
183 program->argv.push_back(x: tokens[0].tstring);
184
185 tokens.erase(position: tokens.begin());
186
187 while (!tokens.empty())
188 {
189 const auto current_token = tokens[0];
190
191 tokens.erase(position: tokens.begin());
192
193 // implement a left-recursive descent lookahead 1 parser
194 switch (current_token.type)
195 {
196 case REDIRECT_IN:
197 case REDIRECT_OUT:
198 case REDIRECT_APPEND:
199 {
200 if (tokens.empty())
201 throw std::runtime_error("Expected a filename after redirection");
202
203 const auto filename = tokens[0].tstring;
204 tokens.erase(position: tokens.begin());
205
206 // 'program 2>&1' is not supported
207 const auto fd = current_token.type == REDIRECT_IN ? 0 : 1;
208 const auto mode = current_token.type == REDIRECT_IN ? BaseRedirection::IOMode::ReadOnly : BaseRedirection::IOMode::WriteOnly;
209 const auto append = current_token.type == REDIRECT_APPEND;
210
211 program->redirections[fd] = std::make_unique<FileRedirection>(args: filename, args: mode, args: append);
212 break;
213 }
214
215 case PIPE:
216 {
217 std::cout << "PIPE isn't supported yet" << std::endl;
218 break;
219 }
220
221 case BACKGROUND:
222 {
223 program->background = true;
224 break;
225 }
226
227 case END:
228 {
229 return program;
230 }
231
232 default:
233 {
234 program->argv.push_back(x: current_token.tstring);
235 break;
236 }
237 }
238 }
239
240 return program;
241}
242
243std::unique_ptr<ProgramSpec> parse_commandline(const std::string &command)
244{
245 try
246 {
247 auto tokens = lex(cmd: command);
248 return parse_program(tokens);
249 }
250 catch (const std::exception &e)
251 {
252 std::cerr << "shlex: " << e.what() << std::endl;
253 return {};
254 }
255}
256