| 1 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 2 | |
| 3 | #include "mos/tasks/elf.hpp" |
| 4 | |
| 5 | #include "mos/filesystem/vfs.hpp" |
| 6 | #include "mos/mm/mm.hpp" |
| 7 | #include "mos/mm/mmap.hpp" |
| 8 | #include "mos/platform/platform.hpp" |
| 9 | #include "mos/tasks/process.hpp" |
| 10 | #include "mos/tasks/schedule.hpp" |
| 11 | #include "mos/tasks/task_types.hpp" |
| 12 | #include "mos/tasks/thread.hpp" |
| 13 | |
| 14 | #include <elf.h> |
| 15 | #include <mos/types.hpp> |
| 16 | #include <mos/vector.hpp> |
| 17 | #include <mos_stdlib.hpp> |
| 18 | #include <mos_string.hpp> |
| 19 | |
| 20 | MOS_STATIC_ASSERT(sizeof(elf_header_t) == 0x40, "elf_header has wrong size" ); |
| 21 | MOS_STATIC_ASSERT(sizeof(elf_program_hdr_t) == 0x38, "elf_program_header has wrong size" ); |
| 22 | |
| 23 | static bool (const elf_header_t *) |
| 24 | { |
| 25 | if (header->identity.magic[0] != ELFMAG0) |
| 26 | return false; |
| 27 | |
| 28 | if (strncmp(str1: &header->identity.magic[1], str2: "ELF" , n: 3) != 0) |
| 29 | return false; |
| 30 | |
| 31 | if (header->identity.bits != ELFCLASS64) |
| 32 | return false; |
| 33 | |
| 34 | if (header->identity.endianness != ELF_ENDIANNESS_MOS_DEFAULT) |
| 35 | return false; |
| 36 | |
| 37 | if (header->identity.osabi != 0) |
| 38 | return false; |
| 39 | |
| 40 | if (header->identity.version != EV_CURRENT) |
| 41 | return false; |
| 42 | |
| 43 | if (header->machine_type != MOS_ELF_PLATFORM) |
| 44 | return false; |
| 45 | |
| 46 | return true; |
| 47 | } |
| 48 | |
| 49 | [[nodiscard]] static bool elf_read_file(FsBaseFile *file, void *buf, off_t offset, size_t size) |
| 50 | { |
| 51 | const size_t read = file->pread(buf, count: size, offset); |
| 52 | return read == size; |
| 53 | } |
| 54 | |
| 55 | static ptr_t (elf_header_t *elf) |
| 56 | { |
| 57 | MOS_UNUSED(elf); |
| 58 | return 0x4000000; // TODO: randomize |
| 59 | } |
| 60 | |
| 61 | /** |
| 62 | * Typical Stack Layout: |
| 63 | * |
| 64 | * (low address) |
| 65 | * |-> u32 argc |
| 66 | * |-> ptr_t argv[] |
| 67 | * | |-> NULL |
| 68 | * |-> ptr_t envp[] |
| 69 | * | |-> NULL |
| 70 | * |-> AuxV |
| 71 | * | |-> AT_... |
| 72 | * | |-> AT_NULL |
| 73 | * |-> argv strings, NULL-terminated |
| 74 | * |-> environment strings, NULL-terminated |
| 75 | * |-> u32 zero |
| 76 | * (high address, end of stack) |
| 77 | */ |
| 78 | |
| 79 | static void elf_setup_main_thread(Thread *thread, elf_startup_info_t *const info, ptr_t *const out_pargv, ptr_t *const out_penvp) |
| 80 | { |
| 81 | dInfo2<elf> << "cpu " << current_cpu->id << ": setting up a new main thread " << thread << " of process " << thread->owner; |
| 82 | |
| 83 | MOS_ASSERT_X(thread->u_stack.head == thread->u_stack.top, "thread %pt's user stack is not empty" , thread); |
| 84 | stack_push_val(&thread->u_stack, (uintn) 0); |
| 85 | |
| 86 | const void *stack_envp[info->envp.size() + 1]; // +1 for the null terminator |
| 87 | const void *stack_argv[info->argv.size() + 1]; // +1 for the null terminator |
| 88 | |
| 89 | // calculate the size of entire stack usage |
| 90 | size_t stack_size = 0; |
| 91 | stack_size += sizeof(uintn); // the topmost zero |
| 92 | stack_size += info->invocation.size() + 1; // +1 for the null terminator |
| 93 | |
| 94 | for (const auto &env : info->envp) |
| 95 | stack_size += env.size() + 1; // +1 for the null terminator |
| 96 | |
| 97 | for (const auto &arg : info->argv) |
| 98 | stack_size += arg.size() + 1; // +1 for the null terminator |
| 99 | |
| 100 | stack_size += sizeof(Elf64_auxv_t) * (info->auxv.size() + 2); // AT_EXECFN and AT_NULL |
| 101 | stack_size += sizeof(stack_envp); // envp |
| 102 | stack_size += sizeof(stack_argv); // argv |
| 103 | stack_size += sizeof(uintn); // argc |
| 104 | |
| 105 | // align to 16 bytes |
| 106 | const size_t aligned_stack_size = ALIGN_UP(stack_size, 16); |
| 107 | thread->u_stack.head = thread->u_stack.top - (aligned_stack_size - stack_size); // so that the stack can be aligned to 16 bytes |
| 108 | |
| 109 | stack_push_val(&thread->u_stack, (uintn) 0); |
| 110 | |
| 111 | void *invocation_ptr = stack_push(stack: &thread->u_stack, data: info->invocation.data(), size: info->invocation.size() + 1); // +1 for the null terminator |
| 112 | |
| 113 | info->AddAuxvEntry(AT_EXECFN, val: (ptr_t) invocation_ptr); |
| 114 | info->AddAuxvEntry(AT_NULL, val: 0); |
| 115 | |
| 116 | // ! copy the environment to the stack in reverse order ! |
| 117 | if (info->envp.empty()) |
| 118 | goto no_envp; |
| 119 | |
| 120 | for (int i = info->envp.size() - 1; i >= 0; i--) |
| 121 | { |
| 122 | const size_t len = info->envp[i].size() + 1; // +1 for the null terminator |
| 123 | stack_envp[i] = stack_push(stack: &thread->u_stack, data: info->envp[i].c_str(), size: len); |
| 124 | } |
| 125 | |
| 126 | no_envp: |
| 127 | stack_envp[info->envp.size()] = NULL; |
| 128 | |
| 129 | // ! copy the argv to the stack in reverse order ! |
| 130 | if (info->argv.empty()) |
| 131 | goto no_argv; |
| 132 | |
| 133 | for (int i = info->argv.size() - 1; i >= 0; i--) |
| 134 | { |
| 135 | const size_t len = info->argv[i].size() + 1; // +1 for the null terminator |
| 136 | stack_argv[i] = stack_push(stack: &thread->u_stack, data: info->argv[i].c_str(), size: len); |
| 137 | } |
| 138 | |
| 139 | no_argv: |
| 140 | stack_argv[info->argv.size()] = NULL; |
| 141 | |
| 142 | stack_push(stack: &thread->u_stack, Self&: data: info->auxv.data(), size: sizeof(Elf64_auxv_t) * info->auxv.size()); // auxv |
| 143 | *out_penvp = (ptr_t) stack_push(stack: &thread->u_stack, data: &stack_envp, size: sizeof(char *) * (info->envp.size() + 1)); // envp |
| 144 | *out_pargv = (ptr_t) stack_push(stack: &thread->u_stack, data: &stack_argv, size: sizeof(char *) * (info->argv.size() + 1)); // argv |
| 145 | stack_push_val(&thread->u_stack, (uintn) info->argv.size()); // argc |
| 146 | MOS_ASSERT(thread->u_stack.head % 16 == 0); |
| 147 | } |
| 148 | |
| 149 | static void elf_map_segment(const elf_program_hdr_t *const ph, ptr_t map_bias, MMContext *mm, FsBaseFile *file) |
| 150 | { |
| 151 | MOS_ASSERT(ph->header_type == ELF_PT_LOAD); |
| 152 | dInfo2<elf> << "program header " // |
| 153 | << (ph->flags() & ELF_PF_R ? 'r' : '-') // |
| 154 | << (ph->flags() & ELF_PF_W ? 'w' : '-') // |
| 155 | << (ph->flags() & ELF_PF_X ? 'x' : '-') // |
| 156 | << ", type '" << ph->header_type << "' at " << ph->vaddr; |
| 157 | |
| 158 | MOS_ASSERT(ph->data_offset % MOS_PAGE_SIZE == ph->vaddr % MOS_PAGE_SIZE); // offset ≡ vaddr (mod page size) |
| 159 | MOS_ASSERT_X(ph->size_in_file <= ph->size_in_mem, "invalid ELF: size in file is larger than size in memory" ); |
| 160 | |
| 161 | const VMFlags flags = [pflags = ph->flags()]() |
| 162 | { |
| 163 | VMFlags f = VM_USER; |
| 164 | if (pflags & ELF_PF_R) |
| 165 | f |= VM_READ; |
| 166 | if (pflags & ELF_PF_W) |
| 167 | f |= VM_WRITE; |
| 168 | if (pflags & ELF_PF_X) |
| 169 | f |= VM_EXEC; |
| 170 | return f; |
| 171 | }(); |
| 172 | |
| 173 | const ptr_t aligned_vaddr = ALIGN_DOWN_TO_PAGE(ph->vaddr); |
| 174 | const size_t npages = (ALIGN_UP_TO_PAGE(ph->vaddr + ph->size_in_mem) - aligned_vaddr) / MOS_PAGE_SIZE; |
| 175 | const size_t aligned_size = ALIGN_DOWN_TO_PAGE(ph->data_offset); |
| 176 | |
| 177 | const ptr_t map_start = map_bias + aligned_vaddr; |
| 178 | dInfo2<elf> << " mapping " << npages << " pages at " << map_start << " (bias at " << map_bias << ") from offset " << aligned_size << "..." ; |
| 179 | |
| 180 | const ptr_t vaddr = mmap_file(ctx: mm, hint_addr: map_start, flags: MMAP_PRIVATE | MMAP_EXACT, VMFlags: flags, n_pages: npages, io: file, offset: aligned_size); |
| 181 | MOS_ASSERT_X(vaddr == map_start, "failed to map ELF segment at " PTR_FMT, aligned_vaddr); |
| 182 | |
| 183 | if (ph->size_in_file < ph->size_in_mem) |
| 184 | { |
| 185 | dInfo2<elf> << " ... and zeroing " << (ph->size_in_mem - ph->size_in_file) << " bytes at " << (map_bias + ph->vaddr + ph->size_in_file); |
| 186 | memzero(s: (char *) map_bias + ph->vaddr + ph->size_in_file, n: ph->size_in_mem - ph->size_in_file); |
| 187 | } |
| 188 | |
| 189 | dInfo2<elf> << " ... done" ; |
| 190 | } |
| 191 | |
| 192 | static ptr_t elf_map_interpreter(const char *path, MMContext *mm) |
| 193 | { |
| 194 | auto interp_file = vfs_openat(AT_FDCWD, path, flags: OPEN_READ | OPEN_EXECUTE); |
| 195 | if (interp_file.isErr()) |
| 196 | return 0; |
| 197 | |
| 198 | interp_file->ref(); |
| 199 | |
| 200 | elf_header_t elf; |
| 201 | if (!elf_read_and_verify_executable(file: interp_file.get(), header: &elf)) |
| 202 | { |
| 203 | mEmerg << "failed to verify ELF header for '" << dentry_name(dentry: interp_file->dentry) << "'" ; |
| 204 | interp_file->unref(); |
| 205 | return 0; |
| 206 | } |
| 207 | |
| 208 | ptr_t entry = 0; |
| 209 | |
| 210 | for (size_t i = 0; i < elf.ph.count; i++) |
| 211 | { |
| 212 | elf_program_hdr_t ph; |
| 213 | if (!elf_read_file(file: interp_file.get(), buf: &ph, offset: elf.ph_offset + i * elf.ph.entry_size, size: elf.ph.entry_size)) |
| 214 | { |
| 215 | mEmerg << "failed to read program header " << i << " for '" << dentry_name(dentry: interp_file->dentry) << "'" ; |
| 216 | interp_file->unref(); |
| 217 | return 0; |
| 218 | } |
| 219 | |
| 220 | if (ph.header_type == ELF_PT_LOAD) |
| 221 | { |
| 222 | // interpreter is always loaded at vaddr 0 |
| 223 | elf_map_segment(ph: &ph, MOS_ELF_INTERPRETER_BASE_OFFSET, mm, file: interp_file.get()); |
| 224 | entry = elf.entry_point; |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | interp_file->unref(); |
| 229 | return MOS_ELF_INTERPRETER_BASE_OFFSET + entry; |
| 230 | } |
| 231 | |
| 232 | __nodiscard bool (Process *proc, FsBaseFile *file, elf_header_t , elf_startup_info_t *info) |
| 233 | { |
| 234 | bool ret = true; |
| 235 | |
| 236 | info->AddAuxvEntry(AT_PAGESZ, MOS_PAGE_SIZE); |
| 237 | info->AddAuxvEntry(AT_UID, val: 0); |
| 238 | info->AddAuxvEntry(AT_EUID, val: 0); |
| 239 | info->AddAuxvEntry(AT_GID, val: 0); |
| 240 | info->AddAuxvEntry(AT_EGID, val: 0); |
| 241 | info->AddAuxvEntry(AT_BASE, MOS_ELF_INTERPRETER_BASE_OFFSET); |
| 242 | |
| 243 | // !! after this point, we must make sure that we switch back to the previous address space before returning from this function !! |
| 244 | MMContext *const prev_mm = mm_switch_context(new_ctx: proc->mm); |
| 245 | |
| 246 | bool should_bias = header.object_type == ET_DYN; // only ET_DYN (shared libraries) needs randomization |
| 247 | ptrdiff_t map_bias = 0; // ELF segments are loaded at vaddr + load_bias |
| 248 | |
| 249 | bool has_interpreter = false; |
| 250 | ptr_t interp_entrypoint = 0; |
| 251 | ptr_t auxv_phdr_vaddr = false; // whether we need to add AT_PHDR, AT_PHENT, AT_PHNUM to the auxv vector |
| 252 | |
| 253 | for (size_t i = 0; i < header.ph.count; i++) |
| 254 | { |
| 255 | elf_program_hdr_t ph; |
| 256 | if (!elf_read_file(file, buf: &ph, offset: header.ph_offset + i * header.ph.entry_size, size: header.ph.entry_size)) |
| 257 | { |
| 258 | mEmerg << "failed to read program header " << i << " for '" << dentry_name(dentry: file->dentry) << "'" ; |
| 259 | const auto prev = mm_switch_context(new_ctx: prev_mm); |
| 260 | (void) prev; |
| 261 | return false; |
| 262 | } |
| 263 | |
| 264 | switch (ph.header_type) |
| 265 | { |
| 266 | case ELF_PT_NULL: break; // ignore |
| 267 | case ELF_PT_INTERP: |
| 268 | { |
| 269 | char interp_name[ph.size_in_file]; |
| 270 | if (!elf_read_file(file, buf: interp_name, offset: ph.data_offset, size: ph.size_in_file)) |
| 271 | { |
| 272 | mEmerg << "failed to read interpreter name for '" << dentry_name(dentry: file->dentry) << "'" ; |
| 273 | const auto prev = mm_switch_context(new_ctx: prev_mm); |
| 274 | (void) prev; |
| 275 | return false; |
| 276 | } |
| 277 | dInfo2<elf> << "elf interpreter: " << interp_name; |
| 278 | has_interpreter = true; |
| 279 | interp_entrypoint = elf_map_interpreter(path: interp_name, mm: proc->mm); |
| 280 | if (!interp_entrypoint) |
| 281 | { |
| 282 | dInfo2<elf> << "failed to map interpreter '" << interp_name << "'" ; |
| 283 | const auto prev = mm_switch_context(new_ctx: prev_mm); |
| 284 | (void) prev; |
| 285 | return false; |
| 286 | } |
| 287 | |
| 288 | if (should_bias) |
| 289 | map_bias = elf_determine_loadbias(elf: &header); |
| 290 | |
| 291 | break; |
| 292 | } |
| 293 | case ELF_PT_LOAD: |
| 294 | { |
| 295 | elf_map_segment(ph: &ph, map_bias, mm: proc->mm, file); |
| 296 | break; |
| 297 | } |
| 298 | case ELF_PT_PHDR: |
| 299 | { |
| 300 | auxv_phdr_vaddr = ph.vaddr; |
| 301 | break; |
| 302 | } |
| 303 | |
| 304 | case ELF_PT_NOTE: break; // intentionally ignored |
| 305 | case ELF_PT_DYNAMIC: break; // will be handled by the dynamic linker |
| 306 | case ELF_PT_TLS: break; // will be handled by the dynamic linker or libc |
| 307 | default: |
| 308 | { |
| 309 | if (MOS_IN_RANGE(ph.header_type, ELF_PT_OS_LOW, ELF_PT_OS_HIGH)) |
| 310 | dInfo2<elf> << "ignoring OS-specific program header type 0x" << ph.header_type; |
| 311 | else if (MOS_IN_RANGE(ph.header_type, ELF_PT_PROCESSOR_LO, ELF_PT_PROCESSOR_HI)) |
| 312 | dInfo2<elf> << "ignoring processor-specific program header type 0x" << ph.header_type; |
| 313 | else |
| 314 | mWarn << "unknown program header type 0x" << ph.header_type; |
| 315 | break; |
| 316 | } |
| 317 | }; |
| 318 | } |
| 319 | |
| 320 | if (auxv_phdr_vaddr) |
| 321 | { |
| 322 | info->AddAuxvEntry(AT_PHDR, val: map_bias + auxv_phdr_vaddr); |
| 323 | info->AddAuxvEntry(AT_PHENT, val: header.ph.entry_size); |
| 324 | info->AddAuxvEntry(AT_PHNUM, val: header.ph.count); |
| 325 | } |
| 326 | |
| 327 | info->AddAuxvEntry(AT_ENTRY, val: map_bias + header.entry_point); // the entry point of the executable, not the interpreter |
| 328 | |
| 329 | ptr_t user_argv, user_envp; |
| 330 | const auto main_thread = proc->main_thread; |
| 331 | elf_setup_main_thread(thread: main_thread, info, out_pargv: &user_argv, out_penvp: &user_envp); |
| 332 | platform_context_setup_main_thread( // |
| 333 | thread: main_thread, // |
| 334 | entry: has_interpreter ? interp_entrypoint : header.entry_point, // |
| 335 | sp: main_thread->u_stack.head, // |
| 336 | argc: info->argv.size(), // |
| 337 | argv: user_argv, // |
| 338 | envp: user_envp // |
| 339 | ); |
| 340 | |
| 341 | MMContext *prev = mm_switch_context(new_ctx: prev_mm); |
| 342 | MOS_UNUSED(prev); |
| 343 | |
| 344 | return ret; |
| 345 | } |
| 346 | |
| 347 | bool elf_read_and_verify_executable(FsBaseFile *file, elf_header_t *) |
| 348 | { |
| 349 | if (!elf_read_file(file, buf: header, offset: 0, size: sizeof(elf_header_t))) |
| 350 | return false; |
| 351 | |
| 352 | const bool valid = elf_verify_header(header); |
| 353 | if (!valid) |
| 354 | return false; |
| 355 | |
| 356 | if (header->object_type != ET_EXEC && header->object_type != ET_DYN) |
| 357 | return false; |
| 358 | |
| 359 | return true; |
| 360 | } |
| 361 | |
| 362 | [[nodiscard]] static bool elf_fill_process(Process *proc, FsBaseFile *file, mos::string_view path, const mos::vector<mos::string> &argv, |
| 363 | const mos::vector<mos::string> &envp) |
| 364 | { |
| 365 | bool ret = false; |
| 366 | |
| 367 | file->ref(); |
| 368 | |
| 369 | elf_header_t elf; |
| 370 | if (!elf_read_and_verify_executable(file, header: &elf)) |
| 371 | { |
| 372 | mEmerg << "failed to verify ELF header for '" << dentry_name(dentry: file->dentry) << "'" ; |
| 373 | file->unref(); // close the file, we should have the file's refcount == 0 here |
| 374 | return ret; |
| 375 | } |
| 376 | |
| 377 | elf_startup_info_t info{ .invocation = path, .argv = argv, .envp = envp }; |
| 378 | ret = elf_do_fill_process(proc, file, header: elf, info: &info); |
| 379 | |
| 380 | file->unref(); // close the file, we should have the file's refcount == 0 here |
| 381 | return ret; |
| 382 | } |
| 383 | |
| 384 | Process *elf_create_process(mos::string_view path, Process *parent, const mos::vector<mos::string> &argv, const mos::vector<mos::string> &envp, const stdio_t *ios) |
| 385 | { |
| 386 | auto file = vfs_openat(AT_FDCWD, path, flags: OPEN_READ | OPEN_EXECUTE); |
| 387 | if (file.isErr()) |
| 388 | { |
| 389 | mos_warn("failed to open '%s'" , path.data()); |
| 390 | return NULL; |
| 391 | } |
| 392 | file->ref(); |
| 393 | |
| 394 | auto proc = process_new(parelagsnt: parent, name: file->dentry->name, ios); |
| 395 | if (!proc) |
| 396 | { |
| 397 | mos_warn("failed to create process for '%s'" , dentry_name(file->dentry).c_str()); |
| 398 | file->unref(); |
| 399 | return proc; |
| 400 | } |
| 401 | |
| 402 | const bool filled = elf_fill_process(proc, file: file.get(), path, argv, envp); |
| 403 | thread_complete_init(thread: proc->main_thread); |
| 404 | scheduler_add_thread(thread: proc->main_thread); |
| 405 | |
| 406 | if (!filled) |
| 407 | { |
| 408 | // TODO how do we make sure that the process is cleaned up properly? |
| 409 | process_exit(proc: std::move(t&: proc), exit_code: 0, SIGKILL); |
| 410 | proc = NULL; |
| 411 | } |
| 412 | |
| 413 | file->unref(); // close the file, we should have the file's refcount == 0 here |
| 414 | return proc; |
| 415 | } |
| 416 | |