1// SPDX-License-Identifier: GPL-3.0-or-later
2
3#include "mos/tasks/elf.h"
4
5#include "mos/filesystem/vfs.h"
6#include "mos/mm/mmap.h"
7#include "mos/platform/platform.h"
8#include "mos/syslog/printk.h"
9#include "mos/tasks/process.h"
10#include "mos/tasks/schedule.h"
11#include "mos/tasks/task_types.h"
12#include "mos/tasks/thread.h"
13
14#include <mos/types.h>
15#include <mos_stdlib.h>
16#include <mos_string.h>
17
18MOS_STATIC_ASSERT(sizeof(elf_header_t) == 0x40, "elf_header has wrong size");
19MOS_STATIC_ASSERT(sizeof(elf_program_hdr_t) == 0x38, "elf_program_header has wrong size");
20
21static void add_auxv_entry(auxv_vec_t *var, unsigned long type, unsigned long val)
22{
23 MOS_ASSERT_X(var->count < AUXV_VEC_SIZE, "auxv vector overflow, increase AUXV_VEC_SIZE");
24
25 var->vector[var->count].a_type = type;
26 var->vector[var->count].a_un.a_val = val;
27 var->count++;
28}
29
30static bool elf_verify_header(const elf_header_t *header)
31{
32 if (header->identity.magic[0] != ELFMAG0)
33 return false;
34
35 if (strncmp(str1: &header->identity.magic[1], str2: "ELF", n: 3) != 0)
36 return false;
37
38 if (header->identity.bits != ELFCLASS64)
39 return false;
40
41 if (header->identity.endianness != ELF_ENDIANNESS_MOS_DEFAULT)
42 return false;
43
44 if (header->identity.osabi != 0)
45 return false;
46
47 if (header->identity.version != EV_CURRENT)
48 return false;
49
50 if (header->machine_type != MOS_ELF_PLATFORM)
51 return false;
52
53 return true;
54}
55
56[[nodiscard]] static bool elf_read_file(file_t *file, void *buf, off_t offset, size_t size)
57{
58 const size_t read = io_pread(io: &file->io, buf, count: size, offset);
59 return read == size;
60}
61
62static ptr_t elf_determine_loadbias(elf_header_t *elf)
63{
64 MOS_UNUSED(elf);
65 return 0x4000000; // TODO: randomize
66}
67
68/**
69 * Typical Stack Layout:
70 *
71 * (low address)
72 * |-> u32 argc
73 * |-> ptr_t argv[]
74 * | |-> NULL
75 * |-> ptr_t envp[]
76 * | |-> NULL
77 * |-> AuxV
78 * | |-> AT_...
79 * | |-> AT_NULL
80 * |-> argv strings, NULL-terminated
81 * |-> environment strings, NULL-terminated
82 * |-> u32 zero
83 * (high address, end of stack)
84 */
85
86static void elf_setup_main_thread(thread_t *thread, elf_startup_info_t *const info, ptr_t *const out_pargv, ptr_t *const out_penvp)
87{
88 pr_dinfo2(elf, "cpu %d: setting up a new main thread %pt of process %pp", current_cpu->id, (void *) thread, (void *) thread->owner);
89
90 MOS_ASSERT_X(thread->u_stack.head == thread->u_stack.top, "thread %pt's user stack is not empty", (void *) thread);
91 stack_push_val(&thread->u_stack, (uintn) 0);
92
93 const void *stack_envp[info->envc + 1]; // +1 for the null terminator
94 const void *stack_argv[info->argc + 1]; // +1 for the null terminator
95
96 // calculate the size of entire stack usage
97 size_t stack_size = 0;
98 stack_size += sizeof(uintn); // the topmost zero
99 stack_size += strlen(str: info->invocation) + 1; // +1 for the null terminator
100
101 for (int i = 0; i < info->envc; i++)
102 stack_size += strlen(str: info->envp[i]) + 1; // +1 for the null terminator
103
104 for (int i = 0; i < info->argc; i++)
105 stack_size += strlen(str: info->argv[i]) + 1; // +1 for the null terminator
106
107 stack_size += sizeof(Elf64_auxv_t) * (info->auxv.count + 2); // AT_EXECFN and AT_NULL
108 stack_size += sizeof(stack_envp); // envp
109 stack_size += sizeof(stack_argv); // argv
110 stack_size += sizeof(uintn); // argc
111
112 // align to 16 bytes
113 const size_t aligned_stack_size = ALIGN_UP(stack_size, 16);
114 thread->u_stack.head = thread->u_stack.top - (aligned_stack_size - stack_size); // so that the stack can be aligned to 16 bytes
115
116 stack_push_val(&thread->u_stack, (uintn) 0);
117
118 void *invocation_ptr = stack_push(stack: &thread->u_stack, data: info->invocation, size: strlen(str: info->invocation) + 1); // +1 for the null terminator
119
120 add_auxv_entry(var: &info->auxv, AT_EXECFN, val: (ptr_t) invocation_ptr);
121 add_auxv_entry(var: &info->auxv, AT_NULL, val: 0);
122
123 // ! copy the environment to the stack in reverse order !
124 if (info->envc == 0)
125 goto no_envp;
126
127 for (int i = info->envc - 1; i >= 0; i--)
128 {
129 const size_t len = strlen(str: info->envp[i]) + 1; // +1 for the null terminator
130 stack_envp[i] = stack_push(stack: &thread->u_stack, data: info->envp[i], size: len);
131 }
132
133no_envp:
134 stack_envp[info->envc] = NULL;
135
136 // ! copy the argv to the stack in reverse order !
137 if (info->argc == 0)
138 goto no_argv;
139
140 for (int i = info->argc - 1; i >= 0; i--)
141 {
142 const size_t len = strlen(str: info->argv[i]) + 1; // +1 for the null terminator
143 stack_argv[i] = stack_push(stack: &thread->u_stack, data: info->argv[i], size: len);
144 }
145
146no_argv:
147 stack_argv[info->argc] = NULL;
148
149 stack_push(stack: &thread->u_stack, data: info->auxv.vector, size: sizeof(Elf64_auxv_t) * info->auxv.count); // auxv
150 *out_penvp = (ptr_t) stack_push(stack: &thread->u_stack, data: &stack_envp, size: sizeof(char *) * (info->envc + 1)); // envp
151 *out_pargv = (ptr_t) stack_push(stack: &thread->u_stack, data: &stack_argv, size: sizeof(char *) * (info->argc + 1)); // argv
152 stack_push_val(&thread->u_stack, (uintn) info->argc); // argc
153 MOS_ASSERT(thread->u_stack.head % 16 == 0);
154}
155
156static void elf_map_segment(const elf_program_hdr_t *const ph, ptr_t map_bias, mm_context_t *mm, file_t *file)
157{
158 MOS_ASSERT(ph->header_type == ELF_PT_LOAD);
159 pr_dinfo2(elf, "program header %c%c%c, type '%d' at " PTR_FMT, //
160 ph->p_flags & ELF_PF_R ? 'r' : '-', //
161 ph->p_flags & ELF_PF_W ? 'w' : '-', //
162 ph->p_flags & ELF_PF_X ? 'x' : '-', //
163 ph->header_type, //
164 ph->vaddr //
165 );
166
167 MOS_ASSERT(ph->data_offset % MOS_PAGE_SIZE == ph->vaddr % MOS_PAGE_SIZE); // offset ≡ vaddr (mod page size)
168 MOS_ASSERT_X(ph->size_in_file <= ph->size_in_mem, "invalid ELF: size in file is larger than size in memory");
169
170 const vm_flags flags = VM_USER | (ph->p_flags & ELF_PF_R ? VM_READ : 0) | (ph->p_flags & ELF_PF_W ? VM_WRITE : 0) | (ph->p_flags & ELF_PF_X ? VM_EXEC : 0);
171 const ptr_t aligned_vaddr = ALIGN_DOWN_TO_PAGE(ph->vaddr);
172 const size_t npages = (ALIGN_UP_TO_PAGE(ph->vaddr + ph->size_in_mem) - aligned_vaddr) / MOS_PAGE_SIZE;
173 const size_t aligned_size = ALIGN_DOWN_TO_PAGE(ph->data_offset);
174
175 const ptr_t map_start = map_bias + aligned_vaddr;
176 pr_dinfo2(elf, " mapping %zu pages at " PTR_FMT " (bias at " PTR_FMT ") from offset %zu...", npages, map_start, map_bias, aligned_size);
177
178 const ptr_t vaddr = mmap_file(ctx: mm, hint_addr: map_start, flags: MMAP_PRIVATE | MMAP_EXACT, vm_flags: flags, n_pages: npages, io: &file->io, offset: aligned_size);
179 MOS_ASSERT_X(vaddr == map_start, "failed to map ELF segment at " PTR_FMT, aligned_vaddr);
180
181 if (ph->size_in_file < ph->size_in_mem)
182 {
183 pr_dinfo2(elf, " ... and zeroing %zu bytes at " PTR_FMT, ph->size_in_mem - ph->size_in_file, map_bias + ph->vaddr + ph->size_in_file);
184 memzero(s: (char *) map_bias + ph->vaddr + ph->size_in_file, n: ph->size_in_mem - ph->size_in_file);
185 }
186
187 pr_dinfo2(elf, " ... done");
188}
189
190static ptr_t elf_map_interpreter(const char *path, mm_context_t *mm)
191{
192 file_t *const interp_file = vfs_openat(AT_FDCWD, path, flags: OPEN_READ | OPEN_EXECUTE);
193 if (IS_ERR(ptr: interp_file))
194 return 0;
195
196 io_ref(io: &interp_file->io);
197
198 elf_header_t elf;
199 if (!elf_read_and_verify_executable(file: interp_file, header: &elf))
200 {
201 pr_emerg("failed to verify ELF header for '%s'", dentry_name(interp_file->dentry));
202 io_unref(io: &interp_file->io);
203 return 0;
204 }
205
206 ptr_t entry = 0;
207
208 for (size_t i = 0; i < elf.ph.count; i++)
209 {
210 elf_program_hdr_t ph;
211 if (!elf_read_file(file: interp_file, buf: &ph, offset: elf.ph_offset + i * elf.ph.entry_size, size: elf.ph.entry_size))
212 {
213 pr_emerg("failed to read program header %zu for '%s'", i, dentry_name(interp_file->dentry));
214 io_unref(io: &interp_file->io);
215 return 0;
216 }
217
218 if (ph.header_type == ELF_PT_LOAD)
219 {
220 // interpreter is always loaded at vaddr 0
221 elf_map_segment(ph: &ph, MOS_ELF_INTERPRETER_BASE_OFFSET, mm, file: interp_file);
222 entry = elf.entry_point;
223 }
224 }
225
226 io_unref(io: &interp_file->io);
227 return MOS_ELF_INTERPRETER_BASE_OFFSET + entry;
228}
229
230__nodiscard bool elf_do_fill_process(process_t *proc, file_t *file, elf_header_t elf, elf_startup_info_t *info)
231{
232 bool ret = true;
233
234 add_auxv_entry(var: &info->auxv, AT_PAGESZ, MOS_PAGE_SIZE);
235 add_auxv_entry(var: &info->auxv, AT_UID, val: 0);
236 add_auxv_entry(var: &info->auxv, AT_EUID, val: 0);
237 add_auxv_entry(var: &info->auxv, AT_GID, val: 0);
238 add_auxv_entry(var: &info->auxv, AT_EGID, val: 0);
239 add_auxv_entry(var: &info->auxv, AT_BASE, MOS_ELF_INTERPRETER_BASE_OFFSET);
240
241 // !! after this point, we must make sure that we switch back to the previous address space before returning from this function !!
242 mm_context_t *const prev_mm = mm_switch_context(new_ctx: proc->mm);
243
244 bool should_bias = elf.object_type == ET_DYN; // only ET_DYN (shared libraries) needs randomization
245 ptrdiff_t map_bias = 0; // ELF segments are loaded at vaddr + load_bias
246
247 bool has_interpreter = false;
248 ptr_t interp_entrypoint = 0;
249 ptr_t auxv_phdr_vaddr = false; // whether we need to add AT_PHDR, AT_PHENT, AT_PHNUM to the auxv vector
250
251 for (size_t i = 0; i < elf.ph.count; i++)
252 {
253 elf_program_hdr_t ph;
254 if (!elf_read_file(file, buf: &ph, offset: elf.ph_offset + i * elf.ph.entry_size, size: elf.ph.entry_size))
255 {
256 pr_emerg("failed to read program header %zu for '%s'", i, dentry_name(file->dentry));
257 goto bad_proc;
258 }
259
260 switch (ph.header_type)
261 {
262 case ELF_PT_NULL: break; // ignore
263 case ELF_PT_INTERP:
264 {
265 char interp_name[ph.size_in_file];
266 if (!elf_read_file(file, buf: interp_name, offset: ph.data_offset, size: ph.size_in_file))
267 {
268 pr_emerg("failed to read interpreter name for '%s'", dentry_name(file->dentry));
269 goto bad_proc;
270 }
271 pr_dinfo2(elf, "elf interpreter: %s", interp_name);
272 has_interpreter = true;
273 interp_entrypoint = elf_map_interpreter(path: interp_name, mm: proc->mm);
274 if (!interp_entrypoint)
275 {
276 pr_dinfo2(elf, "failed to map interpreter '%s'", interp_name);
277 goto bad_proc;
278 }
279
280 if (should_bias)
281 map_bias = elf_determine_loadbias(elf: &elf);
282
283 break;
284 }
285 case ELF_PT_LOAD:
286 {
287 elf_map_segment(ph: &ph, map_bias, mm: proc->mm, file);
288 break;
289 }
290 case ELF_PT_PHDR:
291 {
292 auxv_phdr_vaddr = ph.vaddr;
293 break;
294 }
295
296 case ELF_PT_NOTE: break; // intentionally ignored
297 case ELF_PT_DYNAMIC: break; // will be handled by the dynamic linker
298 case ELF_PT_TLS: break; // will be handled by the dynamic linker or libc
299 default:
300 {
301 if (MOS_IN_RANGE(ph.header_type, ELF_PT_OS_LOW, ELF_PT_OS_HIGH))
302 pr_dinfo2(elf, "ignoring OS-specific program header type 0x%x", ph.header_type);
303 else if (MOS_IN_RANGE(ph.header_type, ELF_PT_PROCESSOR_LO, ELF_PT_PROCESSOR_HI))
304 pr_dinfo2(elf, "ignoring processor-specific program header type 0x%x", ph.header_type);
305 else
306 pr_warn("unknown program header type 0x%x", ph.header_type);
307 break;
308 }
309 };
310 }
311
312 if (auxv_phdr_vaddr)
313 {
314 add_auxv_entry(var: &info->auxv, AT_PHDR, val: map_bias + auxv_phdr_vaddr);
315 add_auxv_entry(var: &info->auxv, AT_PHENT, val: elf.ph.entry_size);
316 add_auxv_entry(var: &info->auxv, AT_PHNUM, val: elf.ph.count);
317 }
318
319 add_auxv_entry(var: &info->auxv, AT_ENTRY, val: map_bias + elf.entry_point); // the entry point of the executable, not the interpreter
320
321 ptr_t user_argv, user_envp;
322 thread_t *const main_thread = proc->main_thread;
323 elf_setup_main_thread(thread: main_thread, info, out_pargv: &user_argv, out_penvp: &user_envp);
324 platform_context_setup_main_thread(thread: main_thread, entry: has_interpreter ? interp_entrypoint : elf.entry_point, sp: main_thread->u_stack.head, argc: info->argc, argv: user_argv, envp: user_envp);
325
326 goto done;
327
328bad_proc:
329 ret = false;
330
331done:;
332 mm_context_t *prev = mm_switch_context(new_ctx: prev_mm);
333 MOS_UNUSED(prev);
334
335 return ret;
336}
337
338bool elf_read_and_verify_executable(file_t *file, elf_header_t *header)
339{
340 if (!elf_read_file(file, buf: header, offset: 0, size: sizeof(elf_header_t)))
341 return false;
342
343 const bool valid = elf_verify_header(header);
344 if (!valid)
345 return false;
346
347 if (header->object_type != ET_EXEC && header->object_type != ET_DYN)
348 return false;
349
350 return true;
351}
352
353bool elf_fill_process(process_t *proc, file_t *file, const char *path, const char *const argv[], const char *const envp[])
354{
355 bool ret = false;
356
357 io_ref(io: &file->io);
358
359 elf_header_t elf;
360 if (!elf_read_and_verify_executable(file, header: &elf))
361 {
362 pr_emerg("failed to verify ELF header for '%s'", dentry_name(file->dentry));
363 goto cleanup_close_file;
364 }
365
366 int argc = 0;
367 while (argv && argv[argc] != NULL)
368 argc++;
369
370 int envc = 0;
371 while (envp && envp[envc] != NULL)
372 envc++;
373
374 elf_startup_info_t info = {
375 .invocation = strdup(src: path),
376 .argc = argc,
377 .argv = kmalloc(sizeof(char *) * (argc + 1)),
378 .envc = envc,
379 .envp = kmalloc(sizeof(char *) * (envc + 1)),
380 .auxv = { 0 },
381 };
382
383 for (int i = 0; i < argc; i++)
384 info.argv[i] = strdup(src: argv[i]); // copy the strings to kernel space, since we are switching to a new address space
385 info.argv[argc] = NULL;
386
387 for (int i = 0; i < envc; i++)
388 info.envp[i] = strdup(src: envp[i]); // copy the strings to kernel space, since we are switching to a new address space
389 info.envp[envc] = NULL;
390
391 ret = elf_do_fill_process(proc, file, elf, info: &info);
392
393 if (info.invocation)
394 kfree(ptr: info.invocation);
395 for (int i = 0; i < argc; i++)
396 kfree(ptr: info.argv[i]);
397 for (int i = 0; i < envc; i++)
398 kfree(ptr: info.envp[i]);
399 kfree(ptr: info.argv);
400 kfree(ptr: info.envp);
401
402cleanup_close_file:
403 io_unref(io: &file->io); // close the file, we should have the file's refcount == 0 here
404
405 return ret;
406}
407
408process_t *elf_create_process(const char *path, process_t *parent, const char *const argv[], const char *const envp[], const stdio_t *ios)
409{
410 process_t *proc = NULL;
411 file_t *file = vfs_openat(AT_FDCWD, path, flags: OPEN_READ | OPEN_EXECUTE);
412 if (IS_ERR(ptr: file))
413 {
414 mos_warn("failed to open '%s'", path);
415 return NULL;
416 }
417 io_ref(io: &file->io);
418
419 proc = process_new(parent, name: file->dentry->name, ios);
420 if (!proc)
421 {
422 mos_warn("failed to create process for '%s'", dentry_name(file->dentry));
423 goto cleanup_close_file;
424 }
425
426 const bool filled = elf_fill_process(proc, file, path, argv, envp);
427 thread_complete_init(thread: proc->main_thread);
428 scheduler_add_thread(thread: proc->main_thread);
429
430 if (!filled)
431 {
432 // TODO how do we make sure that the process is cleaned up properly?
433 process_exit(process: proc, exit_code: 0, SIGKILL);
434 proc = NULL;
435 }
436
437cleanup_close_file:
438 io_unref(io: &file->io); // close the file, we should have the file's refcount == 0 here
439 return proc;
440}
441