1// SPDX-License-Identifier: GPL-3.0-or-later
2
3#include "mos/tasks/elf.hpp"
4
5#include "mos/filesystem/vfs.hpp"
6#include "mos/mm/mm.hpp"
7#include "mos/mm/mmap.hpp"
8#include "mos/platform/platform.hpp"
9#include "mos/tasks/process.hpp"
10#include "mos/tasks/schedule.hpp"
11#include "mos/tasks/task_types.hpp"
12#include "mos/tasks/thread.hpp"
13
14#include <elf.h>
15#include <mos/types.hpp>
16#include <mos/vector.hpp>
17#include <mos_stdlib.hpp>
18#include <mos_string.hpp>
19
20MOS_STATIC_ASSERT(sizeof(elf_header_t) == 0x40, "elf_header has wrong size");
21MOS_STATIC_ASSERT(sizeof(elf_program_hdr_t) == 0x38, "elf_program_header has wrong size");
22
23static bool elf_verify_header(const elf_header_t *header)
24{
25 if (header->identity.magic[0] != ELFMAG0)
26 return false;
27
28 if (strncmp(str1: &header->identity.magic[1], str2: "ELF", n: 3) != 0)
29 return false;
30
31 if (header->identity.bits != ELFCLASS64)
32 return false;
33
34 if (header->identity.endianness != ELF_ENDIANNESS_MOS_DEFAULT)
35 return false;
36
37 if (header->identity.osabi != 0)
38 return false;
39
40 if (header->identity.version != EV_CURRENT)
41 return false;
42
43 if (header->machine_type != MOS_ELF_PLATFORM)
44 return false;
45
46 return true;
47}
48
49[[nodiscard]] static bool elf_read_file(FsBaseFile *file, void *buf, off_t offset, size_t size)
50{
51 const size_t read = file->pread(buf, count: size, offset);
52 return read == size;
53}
54
55static ptr_t elf_determine_loadbias(elf_header_t *elf)
56{
57 MOS_UNUSED(elf);
58 return 0x4000000; // TODO: randomize
59}
60
61/**
62 * Typical Stack Layout:
63 *
64 * (low address)
65 * |-> u32 argc
66 * |-> ptr_t argv[]
67 * | |-> NULL
68 * |-> ptr_t envp[]
69 * | |-> NULL
70 * |-> AuxV
71 * | |-> AT_...
72 * | |-> AT_NULL
73 * |-> argv strings, NULL-terminated
74 * |-> environment strings, NULL-terminated
75 * |-> u32 zero
76 * (high address, end of stack)
77 */
78
79static void elf_setup_main_thread(Thread *thread, elf_startup_info_t *const info, ptr_t *const out_pargv, ptr_t *const out_penvp)
80{
81 dInfo2<elf> << "cpu " << current_cpu->id << ": setting up a new main thread " << thread << " of process " << thread->owner;
82
83 MOS_ASSERT_X(thread->u_stack.head == thread->u_stack.top, "thread %pt's user stack is not empty", thread);
84 stack_push_val(&thread->u_stack, (uintn) 0);
85
86 const void *stack_envp[info->envp.size() + 1]; // +1 for the null terminator
87 const void *stack_argv[info->argv.size() + 1]; // +1 for the null terminator
88
89 // calculate the size of entire stack usage
90 size_t stack_size = 0;
91 stack_size += sizeof(uintn); // the topmost zero
92 stack_size += info->invocation.size() + 1; // +1 for the null terminator
93
94 for (const auto &env : info->envp)
95 stack_size += env.size() + 1; // +1 for the null terminator
96
97 for (const auto &arg : info->argv)
98 stack_size += arg.size() + 1; // +1 for the null terminator
99
100 stack_size += sizeof(Elf64_auxv_t) * (info->auxv.size() + 2); // AT_EXECFN and AT_NULL
101 stack_size += sizeof(stack_envp); // envp
102 stack_size += sizeof(stack_argv); // argv
103 stack_size += sizeof(uintn); // argc
104
105 // align to 16 bytes
106 const size_t aligned_stack_size = ALIGN_UP(stack_size, 16);
107 thread->u_stack.head = thread->u_stack.top - (aligned_stack_size - stack_size); // so that the stack can be aligned to 16 bytes
108
109 stack_push_val(&thread->u_stack, (uintn) 0);
110
111 void *invocation_ptr = stack_push(stack: &thread->u_stack, data: info->invocation.data(), size: info->invocation.size() + 1); // +1 for the null terminator
112
113 info->AddAuxvEntry(AT_EXECFN, val: (ptr_t) invocation_ptr);
114 info->AddAuxvEntry(AT_NULL, val: 0);
115
116 // ! copy the environment to the stack in reverse order !
117 if (info->envp.empty())
118 goto no_envp;
119
120 for (int i = info->envp.size() - 1; i >= 0; i--)
121 {
122 const size_t len = info->envp[i].size() + 1; // +1 for the null terminator
123 stack_envp[i] = stack_push(stack: &thread->u_stack, data: info->envp[i].c_str(), size: len);
124 }
125
126no_envp:
127 stack_envp[info->envp.size()] = NULL;
128
129 // ! copy the argv to the stack in reverse order !
130 if (info->argv.empty())
131 goto no_argv;
132
133 for (int i = info->argv.size() - 1; i >= 0; i--)
134 {
135 const size_t len = info->argv[i].size() + 1; // +1 for the null terminator
136 stack_argv[i] = stack_push(stack: &thread->u_stack, data: info->argv[i].c_str(), size: len);
137 }
138
139no_argv:
140 stack_argv[info->argv.size()] = NULL;
141
142 stack_push(stack: &thread->u_stack, Self&: data: info->auxv.data(), size: sizeof(Elf64_auxv_t) * info->auxv.size()); // auxv
143 *out_penvp = (ptr_t) stack_push(stack: &thread->u_stack, data: &stack_envp, size: sizeof(char *) * (info->envp.size() + 1)); // envp
144 *out_pargv = (ptr_t) stack_push(stack: &thread->u_stack, data: &stack_argv, size: sizeof(char *) * (info->argv.size() + 1)); // argv
145 stack_push_val(&thread->u_stack, (uintn) info->argv.size()); // argc
146 MOS_ASSERT(thread->u_stack.head % 16 == 0);
147}
148
149static void elf_map_segment(const elf_program_hdr_t *const ph, ptr_t map_bias, MMContext *mm, FsBaseFile *file)
150{
151 MOS_ASSERT(ph->header_type == ELF_PT_LOAD);
152 dInfo2<elf> << "program header " //
153 << (ph->flags() & ELF_PF_R ? 'r' : '-') //
154 << (ph->flags() & ELF_PF_W ? 'w' : '-') //
155 << (ph->flags() & ELF_PF_X ? 'x' : '-') //
156 << ", type '" << ph->header_type << "' at " << ph->vaddr;
157
158 MOS_ASSERT(ph->data_offset % MOS_PAGE_SIZE == ph->vaddr % MOS_PAGE_SIZE); // offset ≡ vaddr (mod page size)
159 MOS_ASSERT_X(ph->size_in_file <= ph->size_in_mem, "invalid ELF: size in file is larger than size in memory");
160
161 const VMFlags flags = [pflags = ph->flags()]()
162 {
163 VMFlags f = VM_USER;
164 if (pflags & ELF_PF_R)
165 f |= VM_READ;
166 if (pflags & ELF_PF_W)
167 f |= VM_WRITE;
168 if (pflags & ELF_PF_X)
169 f |= VM_EXEC;
170 return f;
171 }();
172
173 const ptr_t aligned_vaddr = ALIGN_DOWN_TO_PAGE(ph->vaddr);
174 const size_t npages = (ALIGN_UP_TO_PAGE(ph->vaddr + ph->size_in_mem) - aligned_vaddr) / MOS_PAGE_SIZE;
175 const size_t aligned_size = ALIGN_DOWN_TO_PAGE(ph->data_offset);
176
177 const ptr_t map_start = map_bias + aligned_vaddr;
178 dInfo2<elf> << " mapping " << npages << " pages at " << map_start << " (bias at " << map_bias << ") from offset " << aligned_size << "...";
179
180 const ptr_t vaddr = mmap_file(ctx: mm, hint_addr: map_start, flags: MMAP_PRIVATE | MMAP_EXACT, VMFlags: flags, n_pages: npages, io: file, offset: aligned_size);
181 MOS_ASSERT_X(vaddr == map_start, "failed to map ELF segment at " PTR_FMT, aligned_vaddr);
182
183 if (ph->size_in_file < ph->size_in_mem)
184 {
185 dInfo2<elf> << " ... and zeroing " << (ph->size_in_mem - ph->size_in_file) << " bytes at " << (map_bias + ph->vaddr + ph->size_in_file);
186 memzero(s: (char *) map_bias + ph->vaddr + ph->size_in_file, n: ph->size_in_mem - ph->size_in_file);
187 }
188
189 dInfo2<elf> << " ... done";
190}
191
192static ptr_t elf_map_interpreter(const char *path, MMContext *mm)
193{
194 auto interp_file = vfs_openat(AT_FDCWD, path, flags: OPEN_READ | OPEN_EXECUTE);
195 if (interp_file.isErr())
196 return 0;
197
198 interp_file->ref();
199
200 elf_header_t elf;
201 if (!elf_read_and_verify_executable(file: interp_file.get(), header: &elf))
202 {
203 mEmerg << "failed to verify ELF header for '" << dentry_name(dentry: interp_file->dentry) << "'";
204 interp_file->unref();
205 return 0;
206 }
207
208 ptr_t entry = 0;
209
210 for (size_t i = 0; i < elf.ph.count; i++)
211 {
212 elf_program_hdr_t ph;
213 if (!elf_read_file(file: interp_file.get(), buf: &ph, offset: elf.ph_offset + i * elf.ph.entry_size, size: elf.ph.entry_size))
214 {
215 mEmerg << "failed to read program header " << i << " for '" << dentry_name(dentry: interp_file->dentry) << "'";
216 interp_file->unref();
217 return 0;
218 }
219
220 if (ph.header_type == ELF_PT_LOAD)
221 {
222 // interpreter is always loaded at vaddr 0
223 elf_map_segment(ph: &ph, MOS_ELF_INTERPRETER_BASE_OFFSET, mm, file: interp_file.get());
224 entry = elf.entry_point;
225 }
226 }
227
228 interp_file->unref();
229 return MOS_ELF_INTERPRETER_BASE_OFFSET + entry;
230}
231
232__nodiscard bool elf_do_fill_process(Process *proc, FsBaseFile *file, elf_header_t header, elf_startup_info_t *info)
233{
234 bool ret = true;
235
236 info->AddAuxvEntry(AT_PAGESZ, MOS_PAGE_SIZE);
237 info->AddAuxvEntry(AT_UID, val: 0);
238 info->AddAuxvEntry(AT_EUID, val: 0);
239 info->AddAuxvEntry(AT_GID, val: 0);
240 info->AddAuxvEntry(AT_EGID, val: 0);
241 info->AddAuxvEntry(AT_BASE, MOS_ELF_INTERPRETER_BASE_OFFSET);
242
243 // !! after this point, we must make sure that we switch back to the previous address space before returning from this function !!
244 MMContext *const prev_mm = mm_switch_context(new_ctx: proc->mm);
245
246 bool should_bias = header.object_type == ET_DYN; // only ET_DYN (shared libraries) needs randomization
247 ptrdiff_t map_bias = 0; // ELF segments are loaded at vaddr + load_bias
248
249 bool has_interpreter = false;
250 ptr_t interp_entrypoint = 0;
251 ptr_t auxv_phdr_vaddr = false; // whether we need to add AT_PHDR, AT_PHENT, AT_PHNUM to the auxv vector
252
253 for (size_t i = 0; i < header.ph.count; i++)
254 {
255 elf_program_hdr_t ph;
256 if (!elf_read_file(file, buf: &ph, offset: header.ph_offset + i * header.ph.entry_size, size: header.ph.entry_size))
257 {
258 mEmerg << "failed to read program header " << i << " for '" << dentry_name(dentry: file->dentry) << "'";
259 const auto prev = mm_switch_context(new_ctx: prev_mm);
260 (void) prev;
261 return false;
262 }
263
264 switch (ph.header_type)
265 {
266 case ELF_PT_NULL: break; // ignore
267 case ELF_PT_INTERP:
268 {
269 char interp_name[ph.size_in_file];
270 if (!elf_read_file(file, buf: interp_name, offset: ph.data_offset, size: ph.size_in_file))
271 {
272 mEmerg << "failed to read interpreter name for '" << dentry_name(dentry: file->dentry) << "'";
273 const auto prev = mm_switch_context(new_ctx: prev_mm);
274 (void) prev;
275 return false;
276 }
277 dInfo2<elf> << "elf interpreter: " << interp_name;
278 has_interpreter = true;
279 interp_entrypoint = elf_map_interpreter(path: interp_name, mm: proc->mm);
280 if (!interp_entrypoint)
281 {
282 dInfo2<elf> << "failed to map interpreter '" << interp_name << "'";
283 const auto prev = mm_switch_context(new_ctx: prev_mm);
284 (void) prev;
285 return false;
286 }
287
288 if (should_bias)
289 map_bias = elf_determine_loadbias(elf: &header);
290
291 break;
292 }
293 case ELF_PT_LOAD:
294 {
295 elf_map_segment(ph: &ph, map_bias, mm: proc->mm, file);
296 break;
297 }
298 case ELF_PT_PHDR:
299 {
300 auxv_phdr_vaddr = ph.vaddr;
301 break;
302 }
303
304 case ELF_PT_NOTE: break; // intentionally ignored
305 case ELF_PT_DYNAMIC: break; // will be handled by the dynamic linker
306 case ELF_PT_TLS: break; // will be handled by the dynamic linker or libc
307 default:
308 {
309 if (MOS_IN_RANGE(ph.header_type, ELF_PT_OS_LOW, ELF_PT_OS_HIGH))
310 dInfo2<elf> << "ignoring OS-specific program header type 0x" << ph.header_type;
311 else if (MOS_IN_RANGE(ph.header_type, ELF_PT_PROCESSOR_LO, ELF_PT_PROCESSOR_HI))
312 dInfo2<elf> << "ignoring processor-specific program header type 0x" << ph.header_type;
313 else
314 mWarn << "unknown program header type 0x" << ph.header_type;
315 break;
316 }
317 };
318 }
319
320 if (auxv_phdr_vaddr)
321 {
322 info->AddAuxvEntry(AT_PHDR, val: map_bias + auxv_phdr_vaddr);
323 info->AddAuxvEntry(AT_PHENT, val: header.ph.entry_size);
324 info->AddAuxvEntry(AT_PHNUM, val: header.ph.count);
325 }
326
327 info->AddAuxvEntry(AT_ENTRY, val: map_bias + header.entry_point); // the entry point of the executable, not the interpreter
328
329 ptr_t user_argv, user_envp;
330 const auto main_thread = proc->main_thread;
331 elf_setup_main_thread(thread: main_thread, info, out_pargv: &user_argv, out_penvp: &user_envp);
332 platform_context_setup_main_thread( //
333 thread: main_thread, //
334 entry: has_interpreter ? interp_entrypoint : header.entry_point, //
335 sp: main_thread->u_stack.head, //
336 argc: info->argv.size(), //
337 argv: user_argv, //
338 envp: user_envp //
339 );
340
341 MMContext *prev = mm_switch_context(new_ctx: prev_mm);
342 MOS_UNUSED(prev);
343
344 return ret;
345}
346
347bool elf_read_and_verify_executable(FsBaseFile *file, elf_header_t *header)
348{
349 if (!elf_read_file(file, buf: header, offset: 0, size: sizeof(elf_header_t)))
350 return false;
351
352 const bool valid = elf_verify_header(header);
353 if (!valid)
354 return false;
355
356 if (header->object_type != ET_EXEC && header->object_type != ET_DYN)
357 return false;
358
359 return true;
360}
361
362[[nodiscard]] static bool elf_fill_process(Process *proc, FsBaseFile *file, mos::string_view path, const mos::vector<mos::string> &argv,
363 const mos::vector<mos::string> &envp)
364{
365 bool ret = false;
366
367 file->ref();
368
369 elf_header_t elf;
370 if (!elf_read_and_verify_executable(file, header: &elf))
371 {
372 mEmerg << "failed to verify ELF header for '" << dentry_name(dentry: file->dentry) << "'";
373 file->unref(); // close the file, we should have the file's refcount == 0 here
374 return ret;
375 }
376
377 elf_startup_info_t info{ .invocation = path, .argv = argv, .envp = envp };
378 ret = elf_do_fill_process(proc, file, header: elf, info: &info);
379
380 file->unref(); // close the file, we should have the file's refcount == 0 here
381 return ret;
382}
383
384Process *elf_create_process(mos::string_view path, Process *parent, const mos::vector<mos::string> &argv, const mos::vector<mos::string> &envp, const stdio_t *ios)
385{
386 auto file = vfs_openat(AT_FDCWD, path, flags: OPEN_READ | OPEN_EXECUTE);
387 if (file.isErr())
388 {
389 mos_warn("failed to open '%s'", path.data());
390 return NULL;
391 }
392 file->ref();
393
394 auto proc = process_new(parelagsnt: parent, name: file->dentry->name, ios);
395 if (!proc)
396 {
397 mos_warn("failed to create process for '%s'", dentry_name(file->dentry).c_str());
398 file->unref();
399 return proc;
400 }
401
402 const bool filled = elf_fill_process(proc, file: file.get(), path, argv, envp);
403 thread_complete_init(thread: proc->main_thread);
404 scheduler_add_thread(thread: proc->main_thread);
405
406 if (!filled)
407 {
408 // TODO how do we make sure that the process is cleaned up properly?
409 process_exit(proc: std::move(t&: proc), exit_code: 0, SIGKILL);
410 proc = NULL;
411 }
412
413 file->unref(); // close the file, we should have the file's refcount == 0 here
414 return proc;
415}
416