1 | // SPDX-License-Identifier: GPL-3.0-or-later |
2 | |
3 | #include "mos/tasks/elf.h" |
4 | |
5 | #include "mos/filesystem/vfs.h" |
6 | #include "mos/mm/mmap.h" |
7 | #include "mos/platform/platform.h" |
8 | #include "mos/syslog/printk.h" |
9 | #include "mos/tasks/process.h" |
10 | #include "mos/tasks/schedule.h" |
11 | #include "mos/tasks/task_types.h" |
12 | #include "mos/tasks/thread.h" |
13 | |
14 | #include <mos/types.h> |
15 | #include <mos_stdlib.h> |
16 | #include <mos_string.h> |
17 | |
18 | MOS_STATIC_ASSERT(sizeof(elf_header_t) == 0x40, "elf_header has wrong size" ); |
19 | MOS_STATIC_ASSERT(sizeof(elf_program_hdr_t) == 0x38, "elf_program_header has wrong size" ); |
20 | |
21 | static void add_auxv_entry(auxv_vec_t *var, unsigned long type, unsigned long val) |
22 | { |
23 | MOS_ASSERT_X(var->count < AUXV_VEC_SIZE, "auxv vector overflow, increase AUXV_VEC_SIZE" ); |
24 | |
25 | var->vector[var->count].a_type = type; |
26 | var->vector[var->count].a_un.a_val = val; |
27 | var->count++; |
28 | } |
29 | |
30 | static bool (const elf_header_t *) |
31 | { |
32 | if (header->identity.magic[0] != ELFMAG0) |
33 | return false; |
34 | |
35 | if (strncmp(str1: &header->identity.magic[1], str2: "ELF" , n: 3) != 0) |
36 | return false; |
37 | |
38 | if (header->identity.bits != ELFCLASS64) |
39 | return false; |
40 | |
41 | if (header->identity.endianness != ELF_ENDIANNESS_MOS_DEFAULT) |
42 | return false; |
43 | |
44 | if (header->identity.osabi != 0) |
45 | return false; |
46 | |
47 | if (header->identity.version != EV_CURRENT) |
48 | return false; |
49 | |
50 | if (header->machine_type != MOS_ELF_PLATFORM) |
51 | return false; |
52 | |
53 | return true; |
54 | } |
55 | |
56 | [[nodiscard]] static bool elf_read_file(file_t *file, void *buf, off_t offset, size_t size) |
57 | { |
58 | const size_t read = io_pread(io: &file->io, buf, count: size, offset); |
59 | return read == size; |
60 | } |
61 | |
62 | static ptr_t elf_determine_loadbias(elf_header_t *elf) |
63 | { |
64 | MOS_UNUSED(elf); |
65 | return 0x4000000; // TODO: randomize |
66 | } |
67 | |
68 | /** |
69 | * Typical Stack Layout: |
70 | * |
71 | * (low address) |
72 | * |-> u32 argc |
73 | * |-> ptr_t argv[] |
74 | * | |-> NULL |
75 | * |-> ptr_t envp[] |
76 | * | |-> NULL |
77 | * |-> AuxV |
78 | * | |-> AT_... |
79 | * | |-> AT_NULL |
80 | * |-> argv strings, NULL-terminated |
81 | * |-> environment strings, NULL-terminated |
82 | * |-> u32 zero |
83 | * (high address, end of stack) |
84 | */ |
85 | |
86 | static void elf_setup_main_thread(thread_t *thread, elf_startup_info_t *const info, ptr_t *const out_pargv, ptr_t *const out_penvp) |
87 | { |
88 | pr_dinfo2(elf, "cpu %d: setting up a new main thread %pt of process %pp" , current_cpu->id, (void *) thread, (void *) thread->owner); |
89 | |
90 | MOS_ASSERT_X(thread->u_stack.head == thread->u_stack.top, "thread %pt's user stack is not empty" , (void *) thread); |
91 | stack_push_val(&thread->u_stack, (uintn) 0); |
92 | |
93 | const void *stack_envp[info->envc + 1]; // +1 for the null terminator |
94 | const void *stack_argv[info->argc + 1]; // +1 for the null terminator |
95 | |
96 | // calculate the size of entire stack usage |
97 | size_t stack_size = 0; |
98 | stack_size += sizeof(uintn); // the topmost zero |
99 | stack_size += strlen(str: info->invocation) + 1; // +1 for the null terminator |
100 | |
101 | for (int i = 0; i < info->envc; i++) |
102 | stack_size += strlen(str: info->envp[i]) + 1; // +1 for the null terminator |
103 | |
104 | for (int i = 0; i < info->argc; i++) |
105 | stack_size += strlen(str: info->argv[i]) + 1; // +1 for the null terminator |
106 | |
107 | stack_size += sizeof(Elf64_auxv_t) * (info->auxv.count + 2); // AT_EXECFN and AT_NULL |
108 | stack_size += sizeof(stack_envp); // envp |
109 | stack_size += sizeof(stack_argv); // argv |
110 | stack_size += sizeof(uintn); // argc |
111 | |
112 | // align to 16 bytes |
113 | const size_t aligned_stack_size = ALIGN_UP(stack_size, 16); |
114 | thread->u_stack.head = thread->u_stack.top - (aligned_stack_size - stack_size); // so that the stack can be aligned to 16 bytes |
115 | |
116 | stack_push_val(&thread->u_stack, (uintn) 0); |
117 | |
118 | void *invocation_ptr = stack_push(stack: &thread->u_stack, data: info->invocation, size: strlen(str: info->invocation) + 1); // +1 for the null terminator |
119 | |
120 | add_auxv_entry(var: &info->auxv, AT_EXECFN, val: (ptr_t) invocation_ptr); |
121 | add_auxv_entry(var: &info->auxv, AT_NULL, val: 0); |
122 | |
123 | // ! copy the environment to the stack in reverse order ! |
124 | if (info->envc == 0) |
125 | goto no_envp; |
126 | |
127 | for (int i = info->envc - 1; i >= 0; i--) |
128 | { |
129 | const size_t len = strlen(str: info->envp[i]) + 1; // +1 for the null terminator |
130 | stack_envp[i] = stack_push(stack: &thread->u_stack, data: info->envp[i], size: len); |
131 | } |
132 | |
133 | no_envp: |
134 | stack_envp[info->envc] = NULL; |
135 | |
136 | // ! copy the argv to the stack in reverse order ! |
137 | if (info->argc == 0) |
138 | goto no_argv; |
139 | |
140 | for (int i = info->argc - 1; i >= 0; i--) |
141 | { |
142 | const size_t len = strlen(str: info->argv[i]) + 1; // +1 for the null terminator |
143 | stack_argv[i] = stack_push(stack: &thread->u_stack, data: info->argv[i], size: len); |
144 | } |
145 | |
146 | no_argv: |
147 | stack_argv[info->argc] = NULL; |
148 | |
149 | stack_push(stack: &thread->u_stack, data: info->auxv.vector, size: sizeof(Elf64_auxv_t) * info->auxv.count); // auxv |
150 | *out_penvp = (ptr_t) stack_push(stack: &thread->u_stack, data: &stack_envp, size: sizeof(char *) * (info->envc + 1)); // envp |
151 | *out_pargv = (ptr_t) stack_push(stack: &thread->u_stack, data: &stack_argv, size: sizeof(char *) * (info->argc + 1)); // argv |
152 | stack_push_val(&thread->u_stack, (uintn) info->argc); // argc |
153 | MOS_ASSERT(thread->u_stack.head % 16 == 0); |
154 | } |
155 | |
156 | static void elf_map_segment(const elf_program_hdr_t *const ph, ptr_t map_bias, mm_context_t *mm, file_t *file) |
157 | { |
158 | MOS_ASSERT(ph->header_type == ELF_PT_LOAD); |
159 | pr_dinfo2(elf, "program header %c%c%c, type '%d' at " PTR_FMT, // |
160 | ph->p_flags & ELF_PF_R ? 'r' : '-', // |
161 | ph->p_flags & ELF_PF_W ? 'w' : '-', // |
162 | ph->p_flags & ELF_PF_X ? 'x' : '-', // |
163 | ph->header_type, // |
164 | ph->vaddr // |
165 | ); |
166 | |
167 | MOS_ASSERT(ph->data_offset % MOS_PAGE_SIZE == ph->vaddr % MOS_PAGE_SIZE); // offset ≡ vaddr (mod page size) |
168 | MOS_ASSERT_X(ph->size_in_file <= ph->size_in_mem, "invalid ELF: size in file is larger than size in memory" ); |
169 | |
170 | const vm_flags flags = VM_USER | (ph->p_flags & ELF_PF_R ? VM_READ : 0) | (ph->p_flags & ELF_PF_W ? VM_WRITE : 0) | (ph->p_flags & ELF_PF_X ? VM_EXEC : 0); |
171 | const ptr_t aligned_vaddr = ALIGN_DOWN_TO_PAGE(ph->vaddr); |
172 | const size_t npages = (ALIGN_UP_TO_PAGE(ph->vaddr + ph->size_in_mem) - aligned_vaddr) / MOS_PAGE_SIZE; |
173 | const size_t aligned_size = ALIGN_DOWN_TO_PAGE(ph->data_offset); |
174 | |
175 | const ptr_t map_start = map_bias + aligned_vaddr; |
176 | pr_dinfo2(elf, " mapping %zu pages at " PTR_FMT " (bias at " PTR_FMT ") from offset %zu..." , npages, map_start, map_bias, aligned_size); |
177 | |
178 | const ptr_t vaddr = mmap_file(ctx: mm, hint_addr: map_start, flags: MMAP_PRIVATE | MMAP_EXACT, vm_flags: flags, n_pages: npages, io: &file->io, offset: aligned_size); |
179 | MOS_ASSERT_X(vaddr == map_start, "failed to map ELF segment at " PTR_FMT, aligned_vaddr); |
180 | |
181 | if (ph->size_in_file < ph->size_in_mem) |
182 | { |
183 | pr_dinfo2(elf, " ... and zeroing %zu bytes at " PTR_FMT, ph->size_in_mem - ph->size_in_file, map_bias + ph->vaddr + ph->size_in_file); |
184 | memzero(s: (char *) map_bias + ph->vaddr + ph->size_in_file, n: ph->size_in_mem - ph->size_in_file); |
185 | } |
186 | |
187 | pr_dinfo2(elf, " ... done" ); |
188 | } |
189 | |
190 | static ptr_t elf_map_interpreter(const char *path, mm_context_t *mm) |
191 | { |
192 | file_t *const interp_file = vfs_openat(AT_FDCWD, path, flags: OPEN_READ | OPEN_EXECUTE); |
193 | if (IS_ERR(ptr: interp_file)) |
194 | return 0; |
195 | |
196 | io_ref(io: &interp_file->io); |
197 | |
198 | elf_header_t elf; |
199 | if (!elf_read_and_verify_executable(file: interp_file, header: &elf)) |
200 | { |
201 | pr_emerg("failed to verify ELF header for '%s'" , dentry_name(interp_file->dentry)); |
202 | io_unref(io: &interp_file->io); |
203 | return 0; |
204 | } |
205 | |
206 | ptr_t entry = 0; |
207 | |
208 | for (size_t i = 0; i < elf.ph.count; i++) |
209 | { |
210 | elf_program_hdr_t ph; |
211 | if (!elf_read_file(file: interp_file, buf: &ph, offset: elf.ph_offset + i * elf.ph.entry_size, size: elf.ph.entry_size)) |
212 | { |
213 | pr_emerg("failed to read program header %zu for '%s'" , i, dentry_name(interp_file->dentry)); |
214 | io_unref(io: &interp_file->io); |
215 | return 0; |
216 | } |
217 | |
218 | if (ph.header_type == ELF_PT_LOAD) |
219 | { |
220 | // interpreter is always loaded at vaddr 0 |
221 | elf_map_segment(ph: &ph, MOS_ELF_INTERPRETER_BASE_OFFSET, mm, file: interp_file); |
222 | entry = elf.entry_point; |
223 | } |
224 | } |
225 | |
226 | io_unref(io: &interp_file->io); |
227 | return MOS_ELF_INTERPRETER_BASE_OFFSET + entry; |
228 | } |
229 | |
230 | __nodiscard bool elf_do_fill_process(process_t *proc, file_t *file, elf_header_t elf, elf_startup_info_t *info) |
231 | { |
232 | bool ret = true; |
233 | |
234 | add_auxv_entry(var: &info->auxv, AT_PAGESZ, MOS_PAGE_SIZE); |
235 | add_auxv_entry(var: &info->auxv, AT_UID, val: 0); |
236 | add_auxv_entry(var: &info->auxv, AT_EUID, val: 0); |
237 | add_auxv_entry(var: &info->auxv, AT_GID, val: 0); |
238 | add_auxv_entry(var: &info->auxv, AT_EGID, val: 0); |
239 | add_auxv_entry(var: &info->auxv, AT_BASE, MOS_ELF_INTERPRETER_BASE_OFFSET); |
240 | |
241 | // !! after this point, we must make sure that we switch back to the previous address space before returning from this function !! |
242 | mm_context_t *const prev_mm = mm_switch_context(new_ctx: proc->mm); |
243 | |
244 | bool should_bias = elf.object_type == ET_DYN; // only ET_DYN (shared libraries) needs randomization |
245 | ptrdiff_t map_bias = 0; // ELF segments are loaded at vaddr + load_bias |
246 | |
247 | bool has_interpreter = false; |
248 | ptr_t interp_entrypoint = 0; |
249 | ptr_t auxv_phdr_vaddr = false; // whether we need to add AT_PHDR, AT_PHENT, AT_PHNUM to the auxv vector |
250 | |
251 | for (size_t i = 0; i < elf.ph.count; i++) |
252 | { |
253 | elf_program_hdr_t ph; |
254 | if (!elf_read_file(file, buf: &ph, offset: elf.ph_offset + i * elf.ph.entry_size, size: elf.ph.entry_size)) |
255 | { |
256 | pr_emerg("failed to read program header %zu for '%s'" , i, dentry_name(file->dentry)); |
257 | goto bad_proc; |
258 | } |
259 | |
260 | switch (ph.header_type) |
261 | { |
262 | case ELF_PT_NULL: break; // ignore |
263 | case ELF_PT_INTERP: |
264 | { |
265 | char interp_name[ph.size_in_file]; |
266 | if (!elf_read_file(file, buf: interp_name, offset: ph.data_offset, size: ph.size_in_file)) |
267 | { |
268 | pr_emerg("failed to read interpreter name for '%s'" , dentry_name(file->dentry)); |
269 | goto bad_proc; |
270 | } |
271 | pr_dinfo2(elf, "elf interpreter: %s" , interp_name); |
272 | has_interpreter = true; |
273 | interp_entrypoint = elf_map_interpreter(path: interp_name, mm: proc->mm); |
274 | if (!interp_entrypoint) |
275 | { |
276 | pr_dinfo2(elf, "failed to map interpreter '%s'" , interp_name); |
277 | goto bad_proc; |
278 | } |
279 | |
280 | if (should_bias) |
281 | map_bias = elf_determine_loadbias(elf: &elf); |
282 | |
283 | break; |
284 | } |
285 | case ELF_PT_LOAD: |
286 | { |
287 | elf_map_segment(ph: &ph, map_bias, mm: proc->mm, file); |
288 | break; |
289 | } |
290 | case ELF_PT_PHDR: |
291 | { |
292 | auxv_phdr_vaddr = ph.vaddr; |
293 | break; |
294 | } |
295 | |
296 | case ELF_PT_NOTE: break; // intentionally ignored |
297 | case ELF_PT_DYNAMIC: break; // will be handled by the dynamic linker |
298 | case ELF_PT_TLS: break; // will be handled by the dynamic linker or libc |
299 | default: |
300 | { |
301 | if (MOS_IN_RANGE(ph.header_type, ELF_PT_OS_LOW, ELF_PT_OS_HIGH)) |
302 | pr_dinfo2(elf, "ignoring OS-specific program header type 0x%x" , ph.header_type); |
303 | else if (MOS_IN_RANGE(ph.header_type, ELF_PT_PROCESSOR_LO, ELF_PT_PROCESSOR_HI)) |
304 | pr_dinfo2(elf, "ignoring processor-specific program header type 0x%x" , ph.header_type); |
305 | else |
306 | pr_warn("unknown program header type 0x%x" , ph.header_type); |
307 | break; |
308 | } |
309 | }; |
310 | } |
311 | |
312 | if (auxv_phdr_vaddr) |
313 | { |
314 | add_auxv_entry(var: &info->auxv, AT_PHDR, val: map_bias + auxv_phdr_vaddr); |
315 | add_auxv_entry(var: &info->auxv, AT_PHENT, val: elf.ph.entry_size); |
316 | add_auxv_entry(var: &info->auxv, AT_PHNUM, val: elf.ph.count); |
317 | } |
318 | |
319 | add_auxv_entry(var: &info->auxv, AT_ENTRY, val: map_bias + elf.entry_point); // the entry point of the executable, not the interpreter |
320 | |
321 | ptr_t user_argv, user_envp; |
322 | thread_t *const main_thread = proc->main_thread; |
323 | elf_setup_main_thread(thread: main_thread, info, out_pargv: &user_argv, out_penvp: &user_envp); |
324 | platform_context_setup_main_thread(thread: main_thread, entry: has_interpreter ? interp_entrypoint : elf.entry_point, sp: main_thread->u_stack.head, argc: info->argc, argv: user_argv, envp: user_envp); |
325 | |
326 | goto done; |
327 | |
328 | bad_proc: |
329 | ret = false; |
330 | |
331 | done:; |
332 | mm_context_t *prev = mm_switch_context(new_ctx: prev_mm); |
333 | MOS_UNUSED(prev); |
334 | |
335 | return ret; |
336 | } |
337 | |
338 | bool elf_read_and_verify_executable(file_t *file, elf_header_t *) |
339 | { |
340 | if (!elf_read_file(file, buf: header, offset: 0, size: sizeof(elf_header_t))) |
341 | return false; |
342 | |
343 | const bool valid = elf_verify_header(header); |
344 | if (!valid) |
345 | return false; |
346 | |
347 | if (header->object_type != ET_EXEC && header->object_type != ET_DYN) |
348 | return false; |
349 | |
350 | return true; |
351 | } |
352 | |
353 | bool elf_fill_process(process_t *proc, file_t *file, const char *path, const char *const argv[], const char *const envp[]) |
354 | { |
355 | bool ret = false; |
356 | |
357 | io_ref(io: &file->io); |
358 | |
359 | elf_header_t elf; |
360 | if (!elf_read_and_verify_executable(file, header: &elf)) |
361 | { |
362 | pr_emerg("failed to verify ELF header for '%s'" , dentry_name(file->dentry)); |
363 | goto cleanup_close_file; |
364 | } |
365 | |
366 | int argc = 0; |
367 | while (argv && argv[argc] != NULL) |
368 | argc++; |
369 | |
370 | int envc = 0; |
371 | while (envp && envp[envc] != NULL) |
372 | envc++; |
373 | |
374 | elf_startup_info_t info = { |
375 | .invocation = strdup(src: path), |
376 | .argc = argc, |
377 | .argv = kmalloc(sizeof(char *) * (argc + 1)), |
378 | .envc = envc, |
379 | .envp = kmalloc(sizeof(char *) * (envc + 1)), |
380 | .auxv = { 0 }, |
381 | }; |
382 | |
383 | for (int i = 0; i < argc; i++) |
384 | info.argv[i] = strdup(src: argv[i]); // copy the strings to kernel space, since we are switching to a new address space |
385 | info.argv[argc] = NULL; |
386 | |
387 | for (int i = 0; i < envc; i++) |
388 | info.envp[i] = strdup(src: envp[i]); // copy the strings to kernel space, since we are switching to a new address space |
389 | info.envp[envc] = NULL; |
390 | |
391 | ret = elf_do_fill_process(proc, file, elf, info: &info); |
392 | |
393 | if (info.invocation) |
394 | kfree(ptr: info.invocation); |
395 | for (int i = 0; i < argc; i++) |
396 | kfree(ptr: info.argv[i]); |
397 | for (int i = 0; i < envc; i++) |
398 | kfree(ptr: info.envp[i]); |
399 | kfree(ptr: info.argv); |
400 | kfree(ptr: info.envp); |
401 | |
402 | cleanup_close_file: |
403 | io_unref(io: &file->io); // close the file, we should have the file's refcount == 0 here |
404 | |
405 | return ret; |
406 | } |
407 | |
408 | process_t *elf_create_process(const char *path, process_t *parent, const char *const argv[], const char *const envp[], const stdio_t *ios) |
409 | { |
410 | process_t *proc = NULL; |
411 | file_t *file = vfs_openat(AT_FDCWD, path, flags: OPEN_READ | OPEN_EXECUTE); |
412 | if (IS_ERR(ptr: file)) |
413 | { |
414 | mos_warn("failed to open '%s'" , path); |
415 | return NULL; |
416 | } |
417 | io_ref(io: &file->io); |
418 | |
419 | proc = process_new(parent, name: file->dentry->name, ios); |
420 | if (!proc) |
421 | { |
422 | mos_warn("failed to create process for '%s'" , dentry_name(file->dentry)); |
423 | goto cleanup_close_file; |
424 | } |
425 | |
426 | const bool filled = elf_fill_process(proc, file, path, argv, envp); |
427 | thread_complete_init(thread: proc->main_thread); |
428 | scheduler_add_thread(thread: proc->main_thread); |
429 | |
430 | if (!filled) |
431 | { |
432 | // TODO how do we make sure that the process is cleaned up properly? |
433 | process_exit(process: proc, exit_code: 0, SIGKILL); |
434 | proc = NULL; |
435 | } |
436 | |
437 | cleanup_close_file: |
438 | io_unref(io: &file->io); // close the file, we should have the file's refcount == 0 here |
439 | return proc; |
440 | } |
441 | |