1 | // SPDX-License-Identifier: GPL-3.0-or-later |
2 | |
3 | #include "mos/tasks/elf.hpp" |
4 | |
5 | #include "mos/filesystem/vfs.hpp" |
6 | #include "mos/io/io.hpp" |
7 | #include "mos/mm/mm.hpp" |
8 | #include "mos/mm/mmap.hpp" |
9 | #include "mos/platform/platform.hpp" |
10 | #include "mos/syslog/printk.hpp" |
11 | #include "mos/tasks/process.hpp" |
12 | #include "mos/tasks/schedule.hpp" |
13 | #include "mos/tasks/task_types.hpp" |
14 | #include "mos/tasks/thread.hpp" |
15 | |
16 | #include <mos/types.hpp> |
17 | #include <mos_stdlib.hpp> |
18 | #include <mos_string.hpp> |
19 | |
20 | MOS_STATIC_ASSERT(sizeof(elf_header_t) == 0x40, "elf_header has wrong size" ); |
21 | MOS_STATIC_ASSERT(sizeof(elf_program_hdr_t) == 0x38, "elf_program_header has wrong size" ); |
22 | |
23 | static void add_auxv_entry(auxv_vec_t *var, unsigned long type, unsigned long val) |
24 | { |
25 | MOS_ASSERT_X(var->count < AUXV_VEC_SIZE, "auxv vector overflow, increase AUXV_VEC_SIZE" ); |
26 | |
27 | var->vector[var->count].a_type = type; |
28 | var->vector[var->count].a_un.a_val = val; |
29 | var->count++; |
30 | } |
31 | |
32 | static bool (const elf_header_t *) |
33 | { |
34 | if (header->identity.magic[0] != ELFMAG0) |
35 | return false; |
36 | |
37 | if (strncmp(str1: &header->identity.magic[1], str2: "ELF" , n: 3) != 0) |
38 | return false; |
39 | |
40 | if (header->identity.bits != ELFCLASS64) |
41 | return false; |
42 | |
43 | if (header->identity.endianness != ELF_ENDIANNESS_MOS_DEFAULT) |
44 | return false; |
45 | |
46 | if (header->identity.osabi != 0) |
47 | return false; |
48 | |
49 | if (header->identity.version != EV_CURRENT) |
50 | return false; |
51 | |
52 | if (header->machine_type != MOS_ELF_PLATFORM) |
53 | return false; |
54 | |
55 | return true; |
56 | } |
57 | |
58 | [[nodiscard]] static bool elf_read_file(file_t *file, void *buf, off_t offset, size_t size) |
59 | { |
60 | const size_t read = io_pread(io: &file->io, buf, count: size, offset); |
61 | return read == size; |
62 | } |
63 | |
64 | static ptr_t (elf_header_t *elf) |
65 | { |
66 | MOS_UNUSED(elf); |
67 | return 0x4000000; // TODO: randomize |
68 | } |
69 | |
70 | /** |
71 | * Typical Stack Layout: |
72 | * |
73 | * (low address) |
74 | * |-> u32 argc |
75 | * |-> ptr_t argv[] |
76 | * | |-> NULL |
77 | * |-> ptr_t envp[] |
78 | * | |-> NULL |
79 | * |-> AuxV |
80 | * | |-> AT_... |
81 | * | |-> AT_NULL |
82 | * |-> argv strings, NULL-terminated |
83 | * |-> environment strings, NULL-terminated |
84 | * |-> u32 zero |
85 | * (high address, end of stack) |
86 | */ |
87 | |
88 | static void elf_setup_main_thread(Thread *thread, elf_startup_info_t *const info, ptr_t *const out_pargv, ptr_t *const out_penvp) |
89 | { |
90 | pr_dinfo2(elf, "cpu %d: setting up a new main thread %pt of process %pp" , current_cpu->id, thread, thread->owner); |
91 | |
92 | MOS_ASSERT_X(thread->u_stack.head == thread->u_stack.top, "thread %pt's user stack is not empty" , thread); |
93 | stack_push_val(&thread->u_stack, (uintn) 0); |
94 | |
95 | const void *stack_envp[info->envc + 1]; // +1 for the null terminator |
96 | const void *stack_argv[info->argc + 1]; // +1 for the null terminator |
97 | |
98 | // calculate the size of entire stack usage |
99 | size_t stack_size = 0; |
100 | stack_size += sizeof(uintn); // the topmost zero |
101 | stack_size += strlen(str: info->invocation) + 1; // +1 for the null terminator |
102 | |
103 | for (int i = 0; i < info->envc; i++) |
104 | stack_size += strlen(str: info->envp[i]) + 1; // +1 for the null terminator |
105 | |
106 | for (int i = 0; i < info->argc; i++) |
107 | stack_size += strlen(str: info->argv[i]) + 1; // +1 for the null terminator |
108 | |
109 | stack_size += sizeof(Elf64_auxv_t) * (info->auxv.count + 2); // AT_EXECFN and AT_NULL |
110 | stack_size += sizeof(stack_envp); // envp |
111 | stack_size += sizeof(stack_argv); // argv |
112 | stack_size += sizeof(uintn); // argc |
113 | |
114 | // align to 16 bytes |
115 | const size_t aligned_stack_size = ALIGN_UP(stack_size, 16); |
116 | thread->u_stack.head = thread->u_stack.top - (aligned_stack_size - stack_size); // so that the stack can be aligned to 16 bytes |
117 | |
118 | stack_push_val(&thread->u_stack, (uintn) 0); |
119 | |
120 | void *invocation_ptr = stack_push(stack: &thread->u_stack, data: info->invocation, size: strlen(str: info->invocation) + 1); // +1 for the null terminator |
121 | |
122 | add_auxv_entry(var: &info->auxv, AT_EXECFN, val: (ptr_t) invocation_ptr); |
123 | add_auxv_entry(var: &info->auxv, AT_NULL, val: 0); |
124 | |
125 | // ! copy the environment to the stack in reverse order ! |
126 | if (info->envc == 0) |
127 | goto no_envp; |
128 | |
129 | for (int i = info->envc - 1; i >= 0; i--) |
130 | { |
131 | const size_t len = strlen(str: info->envp[i]) + 1; // +1 for the null terminator |
132 | stack_envp[i] = stack_push(stack: &thread->u_stack, data: info->envp[i], size: len); |
133 | } |
134 | |
135 | no_envp: |
136 | stack_envp[info->envc] = NULL; |
137 | |
138 | // ! copy the argv to the stack in reverse order ! |
139 | if (info->argc == 0) |
140 | goto no_argv; |
141 | |
142 | for (int i = info->argc - 1; i >= 0; i--) |
143 | { |
144 | const size_t len = strlen(str: info->argv[i]) + 1; // +1 for the null terminator |
145 | stack_argv[i] = stack_push(stack: &thread->u_stack, data: info->argv[i], size: len); |
146 | } |
147 | |
148 | no_argv: |
149 | stack_argv[info->argc] = NULL; |
150 | |
151 | stack_push(stack: &thread->u_stack, data: info->auxv.vector, size: sizeof(Elf64_auxv_t) * info->auxv.count); // auxv |
152 | *out_penvp = (ptr_t) stack_push(stack: &thread->u_stack, data: &stack_envp, size: sizeof(char *) * (info->envc + 1)); // envp |
153 | *out_pargv = (ptr_t) stack_push(stack: &thread->u_stack, data: &stack_argv, size: sizeof(char *) * (info->argc + 1)); // argv |
154 | stack_push_val(&thread->u_stack, (uintn) info->argc); // argc |
155 | MOS_ASSERT(thread->u_stack.head % 16 == 0); |
156 | } |
157 | |
158 | static void elf_map_segment(const elf_program_hdr_t *const ph, ptr_t map_bias, MMContext *mm, file_t *file) |
159 | { |
160 | MOS_ASSERT(ph->header_type == ELF_PT_LOAD); |
161 | pr_dinfo2(elf, "program header %c%c%c, type '%d' at " PTR_FMT, // |
162 | ph->p_flags & ELF_PF_R ? 'r' : '-', // |
163 | ph->p_flags & ELF_PF_W ? 'w' : '-', // |
164 | ph->p_flags & ELF_PF_X ? 'x' : '-', // |
165 | ph->header_type, // |
166 | ph->vaddr // |
167 | ); |
168 | |
169 | MOS_ASSERT(ph->data_offset % MOS_PAGE_SIZE == ph->vaddr % MOS_PAGE_SIZE); // offset ≡ vaddr (mod page size) |
170 | MOS_ASSERT_X(ph->size_in_file <= ph->size_in_mem, "invalid ELF: size in file is larger than size in memory" ); |
171 | |
172 | const vm_flags flags = [pflags = ph->p_flags]() |
173 | { |
174 | vm_flags f = VM_USER; |
175 | if (pflags & ELF_PF_R) |
176 | f |= VM_READ; |
177 | if (pflags & ELF_PF_W) |
178 | f |= VM_WRITE; |
179 | if (pflags & ELF_PF_X) |
180 | f |= VM_EXEC; |
181 | return f; |
182 | }(); |
183 | |
184 | const ptr_t aligned_vaddr = ALIGN_DOWN_TO_PAGE(ph->vaddr); |
185 | const size_t npages = (ALIGN_UP_TO_PAGE(ph->vaddr + ph->size_in_mem) - aligned_vaddr) / MOS_PAGE_SIZE; |
186 | const size_t aligned_size = ALIGN_DOWN_TO_PAGE(ph->data_offset); |
187 | |
188 | const ptr_t map_start = map_bias + aligned_vaddr; |
189 | pr_dinfo2(elf, " mapping %zu pages at " PTR_FMT " (bias at " PTR_FMT ") from offset %zu..." , npages, map_start, map_bias, aligned_size); |
190 | |
191 | const ptr_t vaddr = mmap_file(ctx: mm, hint_addr: map_start, flags: mmap_flags_t(MMAP_PRIVATE | MMAP_EXACT), vm_flags: flags, n_pages: npages, io: &file->io, offset: aligned_size); |
192 | MOS_ASSERT_X(vaddr == map_start, "failed to map ELF segment at " PTR_FMT, aligned_vaddr); |
193 | |
194 | if (ph->size_in_file < ph->size_in_mem) |
195 | { |
196 | pr_dinfo2(elf, " ... and zeroing %zu bytes at " PTR_FMT, ph->size_in_mem - ph->size_in_file, map_bias + ph->vaddr + ph->size_in_file); |
197 | memzero(s: (char *) map_bias + ph->vaddr + ph->size_in_file, n: ph->size_in_mem - ph->size_in_file); |
198 | } |
199 | |
200 | pr_dinfo2(elf, " ... done" ); |
201 | } |
202 | |
203 | static ptr_t elf_map_interpreter(const char *path, MMContext *mm) |
204 | { |
205 | auto interp_file = vfs_openat(AT_FDCWD, path, flags: open_flags(OPEN_READ | OPEN_EXECUTE)); |
206 | if (interp_file.isErr()) |
207 | return 0; |
208 | |
209 | io_ref(io: &interp_file->io); |
210 | |
211 | elf_header_t elf; |
212 | if (!elf_read_and_verify_executable(file: interp_file.get(), header: &elf)) |
213 | { |
214 | pr_emerg("failed to verify ELF header for '%s'" , dentry_name(interp_file->dentry).c_str()); |
215 | io_unref(io: &interp_file->io); |
216 | return 0; |
217 | } |
218 | |
219 | ptr_t entry = 0; |
220 | |
221 | for (size_t i = 0; i < elf.ph.count; i++) |
222 | { |
223 | elf_program_hdr_t ph; |
224 | if (!elf_read_file(file: interp_file.get(), buf: &ph, offset: elf.ph_offset + i * elf.ph.entry_size, size: elf.ph.entry_size)) |
225 | { |
226 | pr_emerg("failed to read program header %zu for '%s'" , i, dentry_name(interp_file->dentry).c_str()); |
227 | io_unref(io: &interp_file->io); |
228 | return 0; |
229 | } |
230 | |
231 | if (ph.header_type == ELF_PT_LOAD) |
232 | { |
233 | // interpreter is always loaded at vaddr 0 |
234 | elf_map_segment(ph: &ph, MOS_ELF_INTERPRETER_BASE_OFFSET, mm, file: interp_file.get()); |
235 | entry = elf.entry_point; |
236 | } |
237 | } |
238 | |
239 | io_unref(io: &interp_file->io); |
240 | return MOS_ELF_INTERPRETER_BASE_OFFSET + entry; |
241 | } |
242 | |
243 | __nodiscard bool (Process *proc, file_t *file, elf_header_t elf, elf_startup_info_t *info) |
244 | { |
245 | bool ret = true; |
246 | |
247 | add_auxv_entry(var: &info->auxv, AT_PAGESZ, MOS_PAGE_SIZE); |
248 | add_auxv_entry(var: &info->auxv, AT_UID, val: 0); |
249 | add_auxv_entry(var: &info->auxv, AT_EUID, val: 0); |
250 | add_auxv_entry(var: &info->auxv, AT_GID, val: 0); |
251 | add_auxv_entry(var: &info->auxv, AT_EGID, val: 0); |
252 | add_auxv_entry(var: &info->auxv, AT_BASE, MOS_ELF_INTERPRETER_BASE_OFFSET); |
253 | |
254 | // !! after this point, we must make sure that we switch back to the previous address space before returning from this function !! |
255 | MMContext *const prev_mm = mm_switch_context(new_ctx: proc->mm); |
256 | |
257 | bool should_bias = elf.object_type == ET_DYN; // only ET_DYN (shared libraries) needs randomization |
258 | ptrdiff_t map_bias = 0; // ELF segments are loaded at vaddr + load_bias |
259 | |
260 | bool has_interpreter = false; |
261 | ptr_t interp_entrypoint = 0; |
262 | ptr_t auxv_phdr_vaddr = false; // whether we need to add AT_PHDR, AT_PHENT, AT_PHNUM to the auxv vector |
263 | |
264 | for (size_t i = 0; i < elf.ph.count; i++) |
265 | { |
266 | elf_program_hdr_t ph; |
267 | if (!elf_read_file(file, buf: &ph, offset: elf.ph_offset + i * elf.ph.entry_size, size: elf.ph.entry_size)) |
268 | { |
269 | pr_emerg("failed to read program header %zu for '%s'" , i, dentry_name(file->dentry).c_str()); |
270 | const auto prev = mm_switch_context(new_ctx: prev_mm); |
271 | (void) prev; |
272 | return false; |
273 | } |
274 | |
275 | switch (ph.header_type) |
276 | { |
277 | case ELF_PT_NULL: break; // ignore |
278 | case ELF_PT_INTERP: |
279 | { |
280 | char interp_name[ph.size_in_file]; |
281 | if (!elf_read_file(file, buf: interp_name, offset: ph.data_offset, size: ph.size_in_file)) |
282 | { |
283 | pr_emerg("failed to read interpreter name for '%s'" , dentry_name(file->dentry).c_str()); |
284 | const auto prev = mm_switch_context(new_ctx: prev_mm); |
285 | (void) prev; |
286 | return false; |
287 | } |
288 | pr_dinfo2(elf, "elf interpreter: %s" , interp_name); |
289 | has_interpreter = true; |
290 | interp_entrypoint = elf_map_interpreter(path: interp_name, mm: proc->mm); |
291 | if (!interp_entrypoint) |
292 | { |
293 | pr_dinfo2(elf, "failed to map interpreter '%s'" , interp_name); |
294 | const auto prev = mm_switch_context(new_ctx: prev_mm); |
295 | (void) prev; |
296 | return false; |
297 | } |
298 | |
299 | if (should_bias) |
300 | map_bias = elf_determine_loadbias(elf: &elf); |
301 | |
302 | break; |
303 | } |
304 | case ELF_PT_LOAD: |
305 | { |
306 | elf_map_segment(ph: &ph, map_bias, mm: proc->mm, file); |
307 | break; |
308 | } |
309 | case ELF_PT_PHDR: |
310 | { |
311 | auxv_phdr_vaddr = ph.vaddr; |
312 | break; |
313 | } |
314 | |
315 | case ELF_PT_NOTE: break; // intentionally ignored |
316 | case ELF_PT_DYNAMIC: break; // will be handled by the dynamic linker |
317 | case ELF_PT_TLS: break; // will be handled by the dynamic linker or libc |
318 | default: |
319 | { |
320 | if (MOS_IN_RANGE(ph.header_type, ELF_PT_OS_LOW, ELF_PT_OS_HIGH)) |
321 | pr_dinfo2(elf, "ignoring OS-specific program header type 0x%x" , ph.header_type); |
322 | else if (MOS_IN_RANGE(ph.header_type, ELF_PT_PROCESSOR_LO, ELF_PT_PROCESSOR_HI)) |
323 | pr_dinfo2(elf, "ignoring processor-specific program header type 0x%x" , ph.header_type); |
324 | else |
325 | pr_warn("unknown program header type 0x%x" , ph.header_type); |
326 | break; |
327 | } |
328 | }; |
329 | } |
330 | |
331 | if (auxv_phdr_vaddr) |
332 | { |
333 | add_auxv_entry(var: &info->auxv, AT_PHDR, val: map_bias + auxv_phdr_vaddr); |
334 | add_auxv_entry(var: &info->auxv, AT_PHENT, val: elf.ph.entry_size); |
335 | add_auxv_entry(var: &info->auxv, AT_PHNUM, val: elf.ph.count); |
336 | } |
337 | |
338 | add_auxv_entry(var: &info->auxv, AT_ENTRY, val: map_bias + elf.entry_point); // the entry point of the executable, not the interpreter |
339 | |
340 | ptr_t user_argv, user_envp; |
341 | const auto main_thread = proc->main_thread; |
342 | elf_setup_main_thread(thread: main_thread, info, out_pargv: &user_argv, out_penvp: &user_envp); |
343 | platform_context_setup_main_thread(thread: main_thread, entry: has_interpreter ? interp_entrypoint : elf.entry_point, sp: main_thread->u_stack.head, argc: info->argc, argv: user_argv, envp: user_envp); |
344 | |
345 | MMContext *prev = mm_switch_context(new_ctx: prev_mm); |
346 | MOS_UNUSED(prev); |
347 | |
348 | return ret; |
349 | } |
350 | |
351 | bool elf_read_and_verify_executable(file_t *file, elf_header_t *) |
352 | { |
353 | if (!elf_read_file(file, buf: header, offset: 0, size: sizeof(elf_header_t))) |
354 | return false; |
355 | |
356 | const bool valid = elf_verify_header(header); |
357 | if (!valid) |
358 | return false; |
359 | |
360 | if (header->object_type != ET_EXEC && header->object_type != ET_DYN) |
361 | return false; |
362 | |
363 | return true; |
364 | } |
365 | |
366 | bool elf_fill_process(Process *proc, file_t *file, const char *path, const char *const argv[], const char *const envp[]) |
367 | { |
368 | bool ret = false; |
369 | |
370 | io_ref(io: &file->io); |
371 | |
372 | elf_header_t elf; |
373 | if (!elf_read_and_verify_executable(file, header: &elf)) |
374 | { |
375 | pr_emerg("failed to verify ELF header for '%s'" , dentry_name(file->dentry).c_str()); |
376 | io_unref(io: &file->io); // close the file, we should have the file's refcount == 0 here |
377 | return ret; |
378 | } |
379 | |
380 | int argc = 0; |
381 | while (argv && argv[argc] != NULL) |
382 | argc++; |
383 | |
384 | int envc = 0; |
385 | while (envp && envp[envc] != NULL) |
386 | envc++; |
387 | |
388 | elf_startup_info_t info = { |
389 | .invocation = strdup(src: path), |
390 | .auxv = {}, |
391 | .argc = argc, |
392 | .argv = kcalloc<const char *>(n_members: argc + 1), |
393 | .envc = envc, |
394 | .envp = kcalloc<const char *>(n_members: envc + 1), |
395 | }; |
396 | |
397 | for (int i = 0; i < argc; i++) |
398 | info.argv[i] = strdup(src: argv[i]); // copy the strings to kernel space, since we are switching to a new address space |
399 | info.argv[argc] = NULL; |
400 | |
401 | for (int i = 0; i < envc; i++) |
402 | info.envp[i] = strdup(src: envp[i]); // copy the strings to kernel space, since we are switching to a new address space |
403 | info.envp[envc] = NULL; |
404 | |
405 | ret = elf_do_fill_process(proc, file, elf, info: &info); |
406 | |
407 | if (info.invocation) |
408 | kfree(ptr: info.invocation); |
409 | for (int i = 0; i < argc; i++) |
410 | kfree(ptr: info.argv[i]); |
411 | for (int i = 0; i < envc; i++) |
412 | kfree(ptr: info.envp[i]); |
413 | kfree(ptr: info.argv); |
414 | kfree(ptr: info.envp); |
415 | |
416 | io_unref(io: &file->io); // close the file, we should have the file's refcount == 0 here |
417 | return ret; |
418 | } |
419 | |
420 | Process *elf_create_process(const char *path, Process *parent, const char *const argv[], const char *const envp[], const stdio_t *ios) |
421 | { |
422 | auto file = vfs_openat(AT_FDCWD, path, flags: open_flags(OPEN_READ | OPEN_EXECUTE)); |
423 | if (file.isErr()) |
424 | { |
425 | mos_warn("failed to open '%s'" , path); |
426 | return NULL; |
427 | } |
428 | io_ref(io: &file->io); |
429 | |
430 | auto proc = process_new(parent, name: file->dentry->name, ios); |
431 | if (!proc) |
432 | { |
433 | mos_warn("failed to create process for '%s'" , dentry_name(file->dentry).c_str()); |
434 | io_unref(io: &file->io); |
435 | return proc; |
436 | } |
437 | |
438 | const bool filled = elf_fill_process(proc, file: file.get(), path, argv, envp); |
439 | thread_complete_init(thread: proc->main_thread); |
440 | scheduler_add_thread(thread: proc->main_thread); |
441 | |
442 | if (!filled) |
443 | { |
444 | // TODO how do we make sure that the process is cleaned up properly? |
445 | process_exit(proc: std::move(t&: proc), exit_code: 0, SIGKILL); |
446 | proc = NULL; |
447 | } |
448 | |
449 | io_unref(io: &file->io); // close the file, we should have the file's refcount == 0 here |
450 | return proc; |
451 | } |
452 | |