| 1 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 2 | |
| 3 | #include "parser.hpp" |
| 4 | |
| 5 | #include <iostream> |
| 6 | #include <memory> |
| 7 | #include <regex> |
| 8 | #include <stdexcept> |
| 9 | #include <string> |
| 10 | #include <vector> |
| 11 | |
| 12 | static std::vector<token> lex(const std::string &cmd) |
| 13 | { |
| 14 | if (cmd.empty()) |
| 15 | return {}; |
| 16 | |
| 17 | const char *data = cmd.data(); |
| 18 | const char *const end = data + cmd.size(); |
| 19 | std::vector<token> tokens; |
| 20 | std::string curr; |
| 21 | bool in_double_quotes = false; |
| 22 | |
| 23 | const auto flush_curr = [&]() |
| 24 | { |
| 25 | if (!curr.empty()) |
| 26 | tokens.push_back(x: token(curr)), curr.clear(); |
| 27 | }; |
| 28 | |
| 29 | for (; data < end; data++) |
| 30 | { |
| 31 | switch (*data) |
| 32 | { |
| 33 | case '\\': |
| 34 | { |
| 35 | ++data; |
| 36 | if (data == end) |
| 37 | throw std::runtime_error("unexpected end of command" ); |
| 38 | if (!in_double_quotes && *data != '\n') |
| 39 | curr += *data; |
| 40 | else |
| 41 | { |
| 42 | switch (*data) |
| 43 | { |
| 44 | case '\n': break; |
| 45 | case '$': curr += '$'; break; |
| 46 | case '`': curr += '`'; break; |
| 47 | case '"': curr += '"'; break; |
| 48 | case '\\': curr += '\\'; break; |
| 49 | default: curr += '\\', curr += *data; break; |
| 50 | } |
| 51 | } |
| 52 | break; |
| 53 | } |
| 54 | |
| 55 | case '\'': |
| 56 | { |
| 57 | if (in_double_quotes) |
| 58 | goto append_data; |
| 59 | |
| 60 | // read until next single quote |
| 61 | while (++data < end) |
| 62 | { |
| 63 | if (*data == '\'') |
| 64 | break; |
| 65 | curr += *data; |
| 66 | } |
| 67 | |
| 68 | if (data == end) |
| 69 | throw std::runtime_error("Unterminated single quote" ); |
| 70 | |
| 71 | break; |
| 72 | } |
| 73 | |
| 74 | case '"': |
| 75 | { |
| 76 | in_double_quotes = !in_double_quotes; |
| 77 | break; |
| 78 | } |
| 79 | |
| 80 | case '$': |
| 81 | { |
| 82 | // read until next token |
| 83 | std::string var = "$" ; |
| 84 | while (++data < end) |
| 85 | { |
| 86 | if (*data == ' ' || *data == '\t' || *data == '\n' || *data == '\r' || *data == '\'' || *data == '"' || *data == '`') |
| 87 | { |
| 88 | // possibly end the double or single quote |
| 89 | if (in_double_quotes && *data == '"') |
| 90 | in_double_quotes = false; |
| 91 | break; |
| 92 | } |
| 93 | var += *data; |
| 94 | } |
| 95 | |
| 96 | // match and replace the variable |
| 97 | const auto regex = std::regex("(\\$[a-zA-Z0-9_]+)" ); |
| 98 | |
| 99 | std::smatch match; |
| 100 | while (std::regex_search(s: var, m&: match, e: regex)) |
| 101 | { |
| 102 | const auto varname = match[1].str(); |
| 103 | const auto value = getenv(name: varname.c_str() + 1); // skip the $ |
| 104 | |
| 105 | if (value) |
| 106 | var = std::regex_replace(s: var, e: regex, fmt: value); |
| 107 | else |
| 108 | var = std::regex_replace(s: var, e: regex, fmt: "" ); |
| 109 | } |
| 110 | |
| 111 | curr += var; |
| 112 | break; |
| 113 | } |
| 114 | |
| 115 | /// A space or tab ends a token. |
| 116 | case ' ': |
| 117 | case '\t': |
| 118 | { |
| 119 | if (in_double_quotes) |
| 120 | goto append_data; |
| 121 | flush_curr(); |
| 122 | break; |
| 123 | } |
| 124 | |
| 125 | case '&': |
| 126 | { |
| 127 | if (in_double_quotes) |
| 128 | goto append_data; |
| 129 | flush_curr(); |
| 130 | tokens.push_back(x: token(BACKGROUND)); |
| 131 | break; |
| 132 | } |
| 133 | |
| 134 | case '|': |
| 135 | { |
| 136 | if (in_double_quotes) |
| 137 | goto append_data; |
| 138 | flush_curr(); |
| 139 | tokens.push_back(x: token(PIPE)); |
| 140 | break; |
| 141 | } |
| 142 | |
| 143 | case '#': |
| 144 | { |
| 145 | if (in_double_quotes) |
| 146 | goto append_data; |
| 147 | return tokens; |
| 148 | } |
| 149 | |
| 150 | case '<': |
| 151 | case '>': |
| 152 | { |
| 153 | if (in_double_quotes) |
| 154 | goto append_data; |
| 155 | flush_curr(); |
| 156 | tokens.push_back(x: token(*data == '<' ? REDIRECT_IN : REDIRECT_OUT)); |
| 157 | break; |
| 158 | } |
| 159 | |
| 160 | default: |
| 161 | { |
| 162 | append_data: |
| 163 | curr += *data; |
| 164 | break; |
| 165 | } |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | if (in_double_quotes) |
| 170 | throw std::runtime_error("Unterminated double quote" ); |
| 171 | if (!curr.empty()) |
| 172 | tokens.push_back(x: { curr }); |
| 173 | return tokens; |
| 174 | } |
| 175 | |
| 176 | // implement a LL(1) parser for the shell grammar |
| 177 | static std::unique_ptr<ProgramSpec> parse_program(std::vector<token> &tokens) |
| 178 | { |
| 179 | if (tokens.empty()) |
| 180 | return nullptr; |
| 181 | |
| 182 | auto program = std::make_unique<ProgramSpec>(); |
| 183 | program->argv.push_back(x: tokens[0].tstring); |
| 184 | |
| 185 | tokens.erase(position: tokens.begin()); |
| 186 | |
| 187 | while (!tokens.empty()) |
| 188 | { |
| 189 | const auto current_token = tokens[0]; |
| 190 | |
| 191 | tokens.erase(position: tokens.begin()); |
| 192 | |
| 193 | // implement a left-recursive descent lookahead 1 parser |
| 194 | switch (current_token.type) |
| 195 | { |
| 196 | case REDIRECT_IN: |
| 197 | case REDIRECT_OUT: |
| 198 | case REDIRECT_APPEND: |
| 199 | { |
| 200 | if (tokens.empty()) |
| 201 | throw std::runtime_error("Expected a filename after redirection" ); |
| 202 | |
| 203 | const auto filename = tokens[0].tstring; |
| 204 | tokens.erase(position: tokens.begin()); |
| 205 | |
| 206 | // 'program 2>&1' is not supported |
| 207 | const auto fd = current_token.type == REDIRECT_IN ? 0 : 1; |
| 208 | const auto mode = current_token.type == REDIRECT_IN ? BaseRedirection::IOMode::ReadOnly : BaseRedirection::IOMode::WriteOnly; |
| 209 | const auto append = current_token.type == REDIRECT_APPEND; |
| 210 | |
| 211 | program->redirections[fd] = std::make_unique<FileRedirection>(args: filename, args: mode, args: append); |
| 212 | break; |
| 213 | } |
| 214 | |
| 215 | case PIPE: |
| 216 | { |
| 217 | std::cout << "PIPE isn't supported yet" << std::endl; |
| 218 | break; |
| 219 | } |
| 220 | |
| 221 | case BACKGROUND: |
| 222 | { |
| 223 | program->background = true; |
| 224 | break; |
| 225 | } |
| 226 | |
| 227 | case END: |
| 228 | { |
| 229 | return program; |
| 230 | } |
| 231 | |
| 232 | default: |
| 233 | { |
| 234 | program->argv.push_back(x: current_token.tstring); |
| 235 | break; |
| 236 | } |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | return program; |
| 241 | } |
| 242 | |
| 243 | std::unique_ptr<ProgramSpec> parse_commandline(const std::string &command) |
| 244 | { |
| 245 | try |
| 246 | { |
| 247 | auto tokens = lex(cmd: command); |
| 248 | return parse_program(tokens); |
| 249 | } |
| 250 | catch (const std::exception &e) |
| 251 | { |
| 252 | std::cerr << "shlex: " << e.what() << std::endl; |
| 253 | return {}; |
| 254 | } |
| 255 | } |
| 256 | |