1 | // SPDX-License-Identifier: GPL-3.0-or-later |
2 | |
3 | #include "mos/assert.hpp" |
4 | #include "mos/device/timer.hpp" |
5 | #include "mos/filesystem/inode.hpp" |
6 | #include "mos/filesystem/mount.hpp" |
7 | #include "mos/filesystem/page_cache.hpp" |
8 | #include "mos/filesystem/sysfs/sysfs.hpp" |
9 | #include "mos/filesystem/sysfs/sysfs_autoinit.hpp" |
10 | #include "mos/mm/mm.hpp" |
11 | #include "mos/mm/mmstat.hpp" |
12 | |
13 | #include <algorithm> |
14 | #include <dirent.h> |
15 | #include <errno.h> |
16 | #include <mos/filesystem/dentry.hpp> |
17 | #include <mos/filesystem/fs_types.h> |
18 | #include <mos/filesystem/vfs.hpp> |
19 | #include <mos/filesystem/vfs_types.hpp> |
20 | #include <mos/io/io.hpp> |
21 | #include <mos/lib/structures/list.hpp> |
22 | #include <mos/lib/structures/tree.hpp> |
23 | #include <mos/lib/sync/spinlock.hpp> |
24 | #include <mos/mos_global.h> |
25 | #include <mos/platform/platform.hpp> |
26 | #include <mos/syslog/printk.hpp> |
27 | #include <mos/tasks/process.hpp> |
28 | #include <mos/types.hpp> |
29 | #include <mos_stdlib.hpp> |
30 | #include <mos_string.hpp> |
31 | |
32 | static list_head vfs_fs_list; // filesystem_t |
33 | static spinlock_t vfs_fs_list_lock; |
34 | |
35 | dentry_t *root_dentry = NULL; |
36 | |
37 | static long do_pagecache_flush(file_t *file, off_t pgoff, size_t npages) |
38 | { |
39 | pr_dinfo2(vfs, "vfs: flushing page cache for file %pio" , (void *) &file->io); |
40 | |
41 | mutex_acquire(mutex: &file->dentry->inode->cache.lock); |
42 | long ret = 0; |
43 | if (pgoff == 0 && npages == (size_t) -1) |
44 | ret = pagecache_flush_or_drop_all(icache: &file->dentry->inode->cache, drop_page: false); |
45 | else |
46 | ret = pagecache_flush_or_drop(icache: &file->dentry->inode->cache, pgoff, npages, drop_page: false); |
47 | |
48 | mutex_release(mutex: &file->dentry->inode->cache.lock); |
49 | return ret; |
50 | } |
51 | |
52 | static long do_sync_inode(file_t *file) |
53 | { |
54 | const superblock_ops_t *ops = file->dentry->inode->superblock->ops; |
55 | if (ops && ops->sync_inode) |
56 | return ops->sync_inode(file->dentry->inode); |
57 | |
58 | return 0; |
59 | } |
60 | |
61 | // BEGIN: filesystem's io_t operations |
62 | static void vfs_io_ops_close(io_t *io) |
63 | { |
64 | file_t *file = container_of(io, file_t, io); |
65 | if (io->type == IO_FILE && io->flags & IO_WRITABLE) // only flush if the file is writable |
66 | { |
67 | do_pagecache_flush(file, pgoff: 0, npages: (off_t) -1); |
68 | do_sync_inode(file); |
69 | } |
70 | |
71 | dentry_unref(dentry: file->dentry); |
72 | |
73 | if (io->type == IO_FILE) |
74 | { |
75 | const file_ops_t *file_ops = file_get_ops(file); |
76 | if (file_ops) |
77 | { |
78 | if (file_ops->release) |
79 | file_ops->release(file); |
80 | } |
81 | } |
82 | |
83 | delete file; |
84 | } |
85 | |
86 | static void vfs_io_ops_close_dir(io_t *io) |
87 | { |
88 | file_t *file = container_of(io, file_t, io); |
89 | |
90 | if (file->private_data) |
91 | { |
92 | vfs_listdir_state_t *state = static_cast<vfs_listdir_state_t *>(file->private_data); |
93 | list_foreach(vfs_listdir_entry_t, entry, state->entries) |
94 | { |
95 | list_remove(entry); |
96 | delete entry; |
97 | } |
98 | |
99 | delete state; |
100 | file->private_data = NULL; |
101 | } |
102 | |
103 | vfs_io_ops_close(io); // close the file |
104 | } |
105 | |
106 | static size_t vfs_io_ops_read(io_t *io, void *buf, size_t count) |
107 | { |
108 | file_t *file = container_of(io, file_t, io); |
109 | const file_ops_t *const file_ops = file_get_ops(file); |
110 | if (!file_ops || !file_ops->read) |
111 | return 0; |
112 | |
113 | spinlock_acquire(&file->offset_lock); |
114 | size_t ret = file_ops->read(file, buf, count, file->offset); |
115 | if (IS_ERR_VALUE(ret)) |
116 | ; // do nothing |
117 | else if (ret != (size_t) -1) |
118 | file->offset += ret; |
119 | spinlock_release(&file->offset_lock); |
120 | |
121 | return ret; |
122 | } |
123 | |
124 | static size_t vfs_io_ops_write(io_t *io, const void *buf, size_t count) |
125 | { |
126 | file_t *file = container_of(io, file_t, io); |
127 | const file_ops_t *const file_ops = file_get_ops(file); |
128 | if (!file_ops || !file_ops->write) |
129 | return 0; |
130 | |
131 | spinlock_acquire(&file->offset_lock); |
132 | size_t ret = file_ops->write(file, buf, count, file->offset); |
133 | if (!IS_ERR_VALUE(ret)) |
134 | file->offset += ret; |
135 | spinlock_release(&file->offset_lock); |
136 | return ret; |
137 | } |
138 | |
139 | static off_t vfs_io_ops_seek(io_t *io, off_t offset, io_seek_whence_t whence) |
140 | { |
141 | file_t *file = container_of(io, file_t, io); |
142 | |
143 | const file_ops_t *const ops = file_get_ops(file); |
144 | if (ops->seek) |
145 | return ops->seek(file, offset, whence); // use the filesystem's lseek if it exists |
146 | |
147 | spinlock_acquire(&file->offset_lock); |
148 | |
149 | switch (whence) |
150 | { |
151 | case IO_SEEK_SET: |
152 | { |
153 | file->offset = std::max(a: offset, b: 0l); |
154 | break; |
155 | } |
156 | case IO_SEEK_CURRENT: |
157 | { |
158 | off_t new_offset = file->offset + offset; |
159 | new_offset = std::max(a: new_offset, b: 0l); |
160 | file->offset = new_offset; |
161 | break; |
162 | } |
163 | case IO_SEEK_END: |
164 | { |
165 | off_t new_offset = file->dentry->inode->size + offset; |
166 | new_offset = std::max(a: new_offset, b: 0l); |
167 | file->offset = new_offset; |
168 | break; |
169 | } |
170 | case IO_SEEK_DATA: mos_warn("vfs: IO_SEEK_DATA is not supported" ); break; |
171 | case IO_SEEK_HOLE: mos_warn("vfs: IO_SEEK_HOLE is not supported" ); break; |
172 | }; |
173 | |
174 | spinlock_release(&file->offset_lock); |
175 | return file->offset; |
176 | } |
177 | |
178 | static vmfault_result_t vfs_fault_handler(vmap_t *vmap, ptr_t fault_addr, pagefault_t *info) |
179 | { |
180 | MOS_ASSERT(vmap->io); |
181 | file_t *file = container_of(vmap->io, file_t, io); |
182 | const size_t fault_pgoffset = (vmap->io_offset + ALIGN_DOWN_TO_PAGE(fault_addr) - vmap->vaddr) / MOS_PAGE_SIZE; |
183 | |
184 | mutex_acquire(mutex: &file->dentry->inode->cache.lock); // lock the inode cache |
185 | auto pagecache_page = pagecache_get_page_for_read(cache: &file->dentry->inode->cache, pgoff: fault_pgoffset); |
186 | mutex_release(mutex: &file->dentry->inode->cache.lock); |
187 | |
188 | if (pagecache_page.isErr()) |
189 | return VMFAULT_CANNOT_HANDLE; |
190 | |
191 | // ! mm subsystem has verified that this vmap can be written to, but in the page table it's marked as read-only |
192 | // * currently, only CoW pages have this property, we treat this as a CoW page |
193 | if (info->is_present && info->is_write) |
194 | { |
195 | if (pagecache_page == info->faulting_page) |
196 | vmap_stat_dec(vmap, pagecache); // the faulting page is a pagecache page |
197 | else |
198 | vmap_stat_dec(vmap, cow); // the faulting page is a COW page |
199 | vmap_stat_inc(vmap, regular); |
200 | return mm_resolve_cow_fault(vmap, fault_addr, info); // resolve by copying data page into prevate page |
201 | } |
202 | |
203 | info->backing_page = pagecache_page.get(); |
204 | if (vmap->type == VMAP_TYPE_PRIVATE) |
205 | { |
206 | if (info->is_write) |
207 | { |
208 | vmap_stat_inc(vmap, regular); |
209 | // present pages are handled above |
210 | MOS_ASSERT(!info->is_present); |
211 | return VMFAULT_COPY_BACKING_PAGE; // copy and (also) map the backing page |
212 | } |
213 | else |
214 | { |
215 | vmap_stat_inc(vmap, pagecache); |
216 | vmap_stat_inc(vmap, cow); |
217 | return VMFAULT_MAP_BACKING_PAGE_RO; |
218 | } |
219 | } |
220 | else |
221 | { |
222 | vmap_stat_inc(vmap, pagecache); |
223 | vmap_stat_inc(vmap, regular); |
224 | return VMFAULT_MAP_BACKING_PAGE; |
225 | } |
226 | } |
227 | |
228 | static bool vfs_io_ops_mmap(io_t *io, vmap_t *vmap, off_t offset) |
229 | { |
230 | file_t *file = container_of(io, file_t, io); |
231 | const file_ops_t *const file_ops = file_get_ops(file); |
232 | |
233 | MOS_ASSERT(!vmap->on_fault); // there should be no fault handler set |
234 | vmap->on_fault = vfs_fault_handler; |
235 | |
236 | if (file_ops->mmap) |
237 | return file_ops->mmap(file, vmap, offset); |
238 | |
239 | return true; |
240 | } |
241 | |
242 | static bool vfs_io_ops_munmap(io_t *io, vmap_t *vmap, bool *unmapped) |
243 | { |
244 | file_t *file = container_of(io, file_t, io); |
245 | const file_ops_t *const file_ops = file_get_ops(file); |
246 | |
247 | if (file_ops->munmap) |
248 | return file_ops->munmap(file, vmap, unmapped); |
249 | |
250 | return true; |
251 | } |
252 | |
253 | static void vfs_io_ops_getname(const io_t *io, char *buf, size_t size) |
254 | { |
255 | const file_t *file = container_of(io, file_t, io); |
256 | dentry_path(dentry: file->dentry, root: root_dentry, buf, size); |
257 | } |
258 | |
259 | static const io_op_t file_io_ops = { |
260 | .read = vfs_io_ops_read, |
261 | .write = vfs_io_ops_write, |
262 | .close = vfs_io_ops_close, |
263 | .seek = vfs_io_ops_seek, |
264 | .mmap = vfs_io_ops_mmap, |
265 | .munmap = vfs_io_ops_munmap, |
266 | .get_name = vfs_io_ops_getname, |
267 | }; |
268 | |
269 | static const io_op_t dir_io_ops = { |
270 | .read = vfs_list_dir, |
271 | .close = vfs_io_ops_close_dir, |
272 | .get_name = vfs_io_ops_getname, |
273 | }; |
274 | |
275 | // END: filesystem's io_t operations |
276 | |
277 | static __used void vfs_flusher_entry(void *arg) |
278 | { |
279 | MOS_UNUSED(arg); |
280 | while (true) |
281 | { |
282 | timer_msleep(ms: 10 * 1000); |
283 | // pagecache_flush_all(); |
284 | } |
285 | } |
286 | |
287 | static void vfs_flusher_init(void) |
288 | { |
289 | // kthread_create(vfs_flusher_entry, NULL, "vfs_flusher"); |
290 | } |
291 | MOS_INIT(KTHREAD, vfs_flusher_init); |
292 | |
293 | static void vfs_copy_stat(file_stat_t *statbuf, inode_t *inode) |
294 | { |
295 | statbuf->ino = inode->ino; |
296 | statbuf->type = inode->type; |
297 | statbuf->perm = inode->perm; |
298 | statbuf->size = inode->size; |
299 | statbuf->uid = inode->uid; |
300 | statbuf->gid = inode->gid; |
301 | statbuf->sticky = inode->sticky; |
302 | statbuf->suid = inode->suid; |
303 | statbuf->sgid = inode->sgid; |
304 | statbuf->nlinks = inode->nlinks; |
305 | statbuf->accessed = inode->accessed; |
306 | statbuf->modified = inode->modified; |
307 | statbuf->created = inode->created; |
308 | } |
309 | |
310 | static filesystem_t *vfs_find_filesystem(mos::string_view name) |
311 | { |
312 | SpinLocker lock(&vfs_fs_list_lock); |
313 | list_foreach(filesystem_t, fs, vfs_fs_list) |
314 | { |
315 | if (fs->name == name) |
316 | return fs; |
317 | } |
318 | |
319 | return nullptr; |
320 | } |
321 | |
322 | static bool vfs_verify_permissions(dentry_t &file_dentry, bool open, bool read, bool create, bool execute, bool write) |
323 | { |
324 | MOS_ASSERT(file_dentry.inode); |
325 | const file_perm_t file_perm = file_dentry.inode->perm; |
326 | |
327 | // TODO: we are treating all users as root for now, only checks for execute permission |
328 | MOS_UNUSED(open); |
329 | MOS_UNUSED(read); |
330 | MOS_UNUSED(create); |
331 | MOS_UNUSED(write); |
332 | |
333 | if (execute && !(file_perm & PERM_EXEC)) |
334 | return false; // execute permission denied |
335 | |
336 | return true; |
337 | } |
338 | |
339 | static PtrResult<file_t> vfs_do_open(dentry_t *base, const char *path, open_flags flags) |
340 | { |
341 | if (base == NULL) |
342 | return -EINVAL; |
343 | |
344 | const bool may_create = flags & OPEN_CREATE; |
345 | const bool read = flags & OPEN_READ; |
346 | const bool write = flags & OPEN_WRITE; |
347 | const bool exec = flags & OPEN_EXECUTE; |
348 | const bool no_follow = flags & OPEN_NO_FOLLOW; |
349 | const bool expect_dir = flags & OPEN_DIR; |
350 | const bool truncate = flags & OPEN_TRUNCATE; |
351 | |
352 | lastseg_resolve_flags_t resolve_flags = RESOLVE_EXPECT_FILE; |
353 | if (no_follow) |
354 | resolve_flags |= RESOLVE_SYMLINK_NOFOLLOW; |
355 | if (may_create) |
356 | resolve_flags |= RESOLVE_EXPECT_ANY_EXIST; |
357 | if (expect_dir) |
358 | resolve_flags |= RESOLVE_EXPECT_DIR; |
359 | |
360 | auto entry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: resolve_flags); |
361 | if (entry.isErr()) |
362 | { |
363 | pr_dinfo2(vfs, "failed to resolve '%s': create=%d, r=%d, x=%d, nofollow=%d, dir=%d, truncate=%d" , path, may_create, read, exec, no_follow, expect_dir, truncate); |
364 | return entry.getErr(); |
365 | } |
366 | |
367 | bool created = false; |
368 | |
369 | if (may_create && entry->inode == NULL) |
370 | { |
371 | auto parent = dentry_parent(dentry: *entry); |
372 | if (!parent->inode->ops->newfile) |
373 | { |
374 | dentry_unref(dentry: entry.get()); |
375 | return -EROFS; |
376 | } |
377 | |
378 | if (!parent->inode->ops->newfile(parent->inode, entry.get(), FILE_TYPE_REGULAR, 0666)) |
379 | { |
380 | dentry_unref(dentry: entry.get()); |
381 | return -EIO; // failed to create file |
382 | } |
383 | |
384 | created = true; |
385 | } |
386 | |
387 | if (!vfs_verify_permissions(file_dentry&: *entry, open: true, read, create: may_create, execute: exec, write)) |
388 | { |
389 | dentry_unref(dentry: entry.get()); |
390 | return -EACCES; |
391 | } |
392 | |
393 | auto file = vfs_do_open_dentry(entry: entry.get(), created, read, write, exec, truncate); |
394 | if (file.isErr()) |
395 | { |
396 | dentry_unref(dentry: entry.get()); |
397 | return file.getErr(); |
398 | } |
399 | |
400 | return file; |
401 | } |
402 | |
403 | // public functions |
404 | PtrResult<file_t> vfs_do_open_dentry(dentry_t *entry, bool created, bool read, bool write, bool exec, bool truncate) |
405 | { |
406 | MOS_ASSERT(entry->inode); |
407 | MOS_UNUSED(truncate); |
408 | |
409 | file_t *file = mos::create<file_t>(); |
410 | file->dentry = entry; |
411 | |
412 | io_flags_t io_flags = IO_SEEKABLE; |
413 | |
414 | if (read) |
415 | io_flags |= IO_READABLE; |
416 | |
417 | if (write) |
418 | io_flags |= IO_WRITABLE; |
419 | |
420 | if (exec) |
421 | io_flags |= IO_EXECUTABLE; |
422 | |
423 | // only regular files are mmapable |
424 | if (entry->inode->type == FILE_TYPE_REGULAR) |
425 | io_flags |= IO_MMAPABLE; |
426 | |
427 | if (file->dentry->inode->type == FILE_TYPE_DIRECTORY) |
428 | io_init(io: &file->io, type: IO_DIR, flags: (io_flags | IO_READABLE) & ~IO_SEEKABLE, ops: &dir_io_ops); |
429 | else |
430 | io_init(io: &file->io, type: IO_FILE, flags: io_flags, ops: &file_io_ops); |
431 | |
432 | const file_ops_t *ops = file_get_ops(file); |
433 | if (ops && ops->open) |
434 | { |
435 | bool opened = ops->open(file->dentry->inode, file, created); |
436 | if (!opened) |
437 | { |
438 | delete file; |
439 | return -ENOTSUP; |
440 | } |
441 | } |
442 | |
443 | return file; |
444 | } |
445 | |
446 | void vfs_register_filesystem(filesystem_t *fs) |
447 | { |
448 | if (vfs_find_filesystem(name: fs->name)) |
449 | mos_panic("filesystem '%s' already registered" , fs->name.c_str()); |
450 | |
451 | MOS_ASSERT(list_is_empty(list_node(fs))); |
452 | |
453 | spinlock_acquire(&vfs_fs_list_lock); |
454 | list_node_append(head: &vfs_fs_list, list_node(fs)); |
455 | spinlock_release(&vfs_fs_list_lock); |
456 | |
457 | pr_dinfo2(vfs, "filesystem '%s' registered" , fs->name.c_str()); |
458 | } |
459 | |
460 | long vfs_mount(const char *device, const char *path, const char *fs, const char *options) |
461 | { |
462 | filesystem_t *real_fs = vfs_find_filesystem(name: fs); |
463 | if (unlikely(real_fs == NULL)) |
464 | { |
465 | mos_warn("filesystem '%s' not found" , fs); |
466 | return -EINVAL; |
467 | } |
468 | |
469 | MOS_ASSERT_X(real_fs->mount, "filesystem '%s' does not support mounting" , real_fs->name.c_str()); |
470 | |
471 | if (unlikely(strcmp(path, "/" ) == 0)) |
472 | { |
473 | // special case: mount root filesystem |
474 | if (root_dentry) |
475 | { |
476 | pr_warn("root filesystem is already mounted" ); |
477 | return -EBUSY; |
478 | } |
479 | pr_dinfo2(vfs, "mounting root filesystem '%s'..." , fs); |
480 | const auto mountResult = real_fs->mount(real_fs, device, options); |
481 | if (mountResult.isErr()) |
482 | { |
483 | mos_warn("failed to mount root filesystem" ); |
484 | return -EIO; |
485 | } |
486 | else |
487 | { |
488 | root_dentry = mountResult.get(); |
489 | } |
490 | |
491 | pr_dinfo2(vfs, "root filesystem mounted, dentry=%p" , (void *) root_dentry); |
492 | |
493 | MOS_ASSERT(root_dentry->name.empty()); |
494 | bool mounted = dentry_mount(mountpoint: root_dentry, root: root_dentry, fs: real_fs); |
495 | MOS_ASSERT(mounted); |
496 | |
497 | return 0; |
498 | } |
499 | |
500 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
501 | if (base.isErr()) |
502 | return base.getErr(); |
503 | |
504 | auto mpRoot = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_DIR | RESOLVE_EXPECT_EXIST); |
505 | if (mpRoot.isErr()) |
506 | return mpRoot.getErr(); |
507 | |
508 | if (mpRoot->is_mountpoint) |
509 | { |
510 | // we don't support overlaying filesystems yet |
511 | mos_warn("mount point is already mounted" ); |
512 | dentry_unref(dentry: mpRoot.get()); |
513 | return -ENOTSUP; |
514 | } |
515 | |
516 | // when mounting: |
517 | // mounted_root will have a reference of 1 |
518 | // the mount_point will have its reference incremented by 1 |
519 | auto mounted_root = real_fs->mount(real_fs, device, options); |
520 | if (mounted_root.isErr()) |
521 | { |
522 | mos_warn("failed to mount filesystem" ); |
523 | return mounted_root.getErr(); |
524 | } |
525 | |
526 | const bool mounted = dentry_mount(mountpoint: mpRoot.get(), root: mounted_root.get(), fs: real_fs); |
527 | if (unlikely(!mounted)) |
528 | { |
529 | mos_warn("failed to mount filesystem" ); |
530 | return -EIO; |
531 | } |
532 | |
533 | MOS_ASSERT_X(mpRoot->refcount == mounted_root->refcount, "mountpoint refcount=%zu, mounted_root refcount=%zu" , mpRoot->refcount.load(), |
534 | mounted_root->refcount.load()); |
535 | pr_dinfo2(vfs, "mounted filesystem '%s' on '%s'" , fs, path); |
536 | return 0; |
537 | } |
538 | |
539 | long vfs_unmount(const char *path) |
540 | { |
541 | auto mounted_root = dentry_resolve(starting_dir: root_dentry, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_DIR | RESOLVE_EXPECT_EXIST); |
542 | if (mounted_root.isErr()) |
543 | return mounted_root.getErr(); |
544 | |
545 | // the mounted root itself holds a ref, and the caller of this function |
546 | if (mounted_root->refcount != 2) |
547 | { |
548 | dentry_check_refstat(dentry: mounted_root.get()); |
549 | mos_warn("refcount is not as expected" ); |
550 | return -EBUSY; |
551 | } |
552 | |
553 | dentry_unref(dentry: mounted_root.get()); // release the reference held by this function |
554 | |
555 | // unmounting root filesystem |
556 | auto mountpoint = dentry_unmount(root: mounted_root.get()); |
557 | if (!mountpoint) |
558 | { |
559 | mos_warn("failed to unmount filesystem" ); |
560 | return -EIO; |
561 | } |
562 | |
563 | MOS_ASSERT(mounted_root->refcount == mountpoint->refcount && mountpoint->refcount == 1); |
564 | if (mounted_root->superblock->fs->unmount) |
565 | mounted_root->superblock->fs->unmount(mounted_root->superblock->fs, mounted_root.get()); |
566 | else |
567 | MOS_ASSERT(dentry_unref_one_norelease(mounted_root.get())); |
568 | MOS_ASSERT_X(mounted_root->refcount == 0, "fs->umount should release the last reference to the mounted root" ); |
569 | |
570 | if (mounted_root == root_dentry) |
571 | { |
572 | pr_info2("unmounted root filesystem" ); |
573 | root_dentry = NULL; |
574 | return 0; |
575 | } |
576 | |
577 | dentry_unref(dentry: mountpoint); |
578 | return 0; |
579 | } |
580 | |
581 | PtrResult<file_t> vfs_openat(int fd, const char *path, open_flags flags) |
582 | { |
583 | pr_dinfo2(vfs, "vfs_openat(fd=%d, path='%s', flags=%x)" , fd, path, flags); |
584 | auto basedir = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd); |
585 | if (basedir.isErr()) |
586 | return basedir.getErr(); |
587 | |
588 | auto file = vfs_do_open(base: basedir.get(), path, flags); |
589 | return file; |
590 | } |
591 | |
592 | long vfs_fstatat(fd_t fd, const char *path, file_stat_t *__restrict statbuf, fstatat_flags flags) |
593 | { |
594 | if (flags & FSTATAT_FILE) |
595 | { |
596 | pr_dinfo2(vfs, "vfs_fstatat(fd=%d, path='%p', stat=%p, flags=%x)" , fd, (void *) path, (void *) statbuf, flags); |
597 | io_t *io = process_get_fd(current_process, fd); |
598 | if (!(io_valid(io) && (io->type == IO_FILE || io->type == IO_DIR))) |
599 | return -EBADF; // io is closed, or is not a file or directory |
600 | |
601 | file_t *file = container_of(io, file_t, io); |
602 | MOS_ASSERT(file); |
603 | if (statbuf) |
604 | vfs_copy_stat(statbuf, inode: file->dentry->inode); |
605 | |
606 | return 0; |
607 | } |
608 | |
609 | pr_dinfo2(vfs, "vfs_fstatat(fd=%d, path='%s', stat=%p, flags=%x)" , fd, path, (void *) statbuf, flags); |
610 | auto basedir = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd); |
611 | if (basedir.isErr()) |
612 | return basedir.getErr(); |
613 | |
614 | lastseg_resolve_flags_t resolve_flags = RESOLVE_EXPECT_ANY_TYPE | RESOLVE_EXPECT_EXIST; |
615 | if (flags & FSTATAT_NOFOLLOW) |
616 | resolve_flags |= RESOLVE_SYMLINK_NOFOLLOW; |
617 | |
618 | auto dentry = dentry_resolve(starting_dir: basedir.get(), root_dir: root_dentry, path, flags: resolve_flags); |
619 | if (dentry.isErr()) |
620 | return dentry.getErr(); |
621 | |
622 | if (statbuf) |
623 | vfs_copy_stat(statbuf, inode: dentry->inode); |
624 | dentry_unref(dentry: dentry.get()); |
625 | return 0; |
626 | } |
627 | |
628 | size_t vfs_readlinkat(fd_t dirfd, const char *path, char *buf, size_t size) |
629 | { |
630 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd); |
631 | if (base.isErr()) |
632 | return base.getErr(); |
633 | |
634 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_SYMLINK_NOFOLLOW | RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_FILE); |
635 | if (dentry.isErr()) |
636 | return dentry.getErr(); |
637 | |
638 | if (dentry->inode->type != FILE_TYPE_SYMLINK) |
639 | { |
640 | dentry_unref(dentry: dentry.get()); |
641 | return -EINVAL; |
642 | } |
643 | |
644 | const size_t len = dentry->inode->ops->readlink(dentry.get(), buf, size); |
645 | |
646 | dentry_unref(dentry: dentry.get()); |
647 | |
648 | if (len >= size) // buffer too small |
649 | return -ENAMETOOLONG; |
650 | |
651 | return len; |
652 | } |
653 | |
654 | long vfs_symlink(const char *path, const char *target) |
655 | { |
656 | pr_dinfo2(vfs, "vfs_symlink(path='%s', target='%s')" , path, target); |
657 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
658 | if (base.isErr()) |
659 | return base.getErr(); |
660 | |
661 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_NONEXIST); |
662 | if (dentry.isErr()) |
663 | return dentry.getErr(); |
664 | |
665 | dentry_t *parent_dir = dentry_parent(dentry: *dentry); |
666 | const bool created = parent_dir->inode->ops->symlink(parent_dir->inode, dentry.get(), target); |
667 | |
668 | if (!created) |
669 | mos_warn("failed to create symlink '%s'" , path); |
670 | |
671 | dentry_unref(dentry: dentry.get()); |
672 | return created ? 0 : -EIO; |
673 | } |
674 | |
675 | long vfs_mkdir(const char *path) |
676 | { |
677 | pr_dinfo2(vfs, "vfs_mkdir('%s')" , path); |
678 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
679 | if (base.isErr()) |
680 | return base.getErr(); |
681 | |
682 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_NONEXIST); |
683 | if (dentry.isErr()) |
684 | return dentry.getErr(); |
685 | |
686 | dentry_t *parent_dir = dentry_parent(dentry: *dentry); |
687 | if (parent_dir->inode == NULL || parent_dir->inode->ops == NULL || parent_dir->inode->ops->mkdir == NULL) |
688 | { |
689 | dentry_unref(dentry: dentry.get()); |
690 | return false; |
691 | } |
692 | |
693 | // TODO: use umask or something else |
694 | const bool created = parent_dir->inode->ops->mkdir(parent_dir->inode, dentry.get(), parent_dir->inode->perm); |
695 | |
696 | if (!created) |
697 | mos_warn("failed to create directory '%s'" , path); |
698 | |
699 | dentry_unref(dentry: dentry.get()); |
700 | return created ? 0 : -EIO; |
701 | } |
702 | |
703 | long vfs_rmdir(const char *path) |
704 | { |
705 | pr_dinfo2(vfs, "vfs_rmdir('%s')" , path); |
706 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
707 | if (base.isErr()) |
708 | return base.getErr(); |
709 | |
710 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_DIR); |
711 | if (dentry.isErr()) |
712 | return dentry.getErr(); |
713 | |
714 | dentry_t *parent_dir = dentry_parent(dentry: *dentry); |
715 | if (parent_dir->inode == NULL || parent_dir->inode->ops == NULL || parent_dir->inode->ops->rmdir == NULL) |
716 | { |
717 | dentry_unref(dentry: dentry.get()); |
718 | return -ENOTSUP; |
719 | } |
720 | |
721 | const bool removed = parent_dir->inode->ops->rmdir(parent_dir->inode, dentry.get()); |
722 | |
723 | if (!removed) |
724 | mos_warn("failed to remove directory '%s'" , path); |
725 | |
726 | dentry_unref(dentry: dentry.get()); |
727 | return removed ? 0 : -EIO; |
728 | } |
729 | |
730 | size_t vfs_list_dir(io_t *io, void *user_buf, size_t user_size) |
731 | { |
732 | pr_dinfo2(vfs, "vfs_list_dir(io=%p, buf=%p, size=%zu)" , (void *) io, (void *) user_buf, user_size); |
733 | file_t *file = container_of(io, file_t, io); |
734 | if (unlikely(file->dentry->inode->type != FILE_TYPE_DIRECTORY)) |
735 | { |
736 | mos_warn("not a directory" ); |
737 | return 0; |
738 | } |
739 | |
740 | if (file->private_data == NULL) |
741 | { |
742 | vfs_listdir_state_t *const state = mos::create<vfs_listdir_state_t>(); |
743 | file->private_data = state; |
744 | linked_list_init(head_node: &state->entries); |
745 | state->n_count = state->read_offset = 0; |
746 | vfs_populate_listdir_buf(dir: file->dentry, state); |
747 | } |
748 | |
749 | vfs_listdir_state_t *const state = (vfs_listdir_state_t *) file->private_data; |
750 | |
751 | if (state->read_offset >= state->n_count) |
752 | return 0; // no more entries |
753 | |
754 | size_t bytes_copied = 0; |
755 | size_t i = 0; |
756 | list_foreach(vfs_listdir_entry_t, entry, state->entries) |
757 | { |
758 | if (i++ < state->read_offset) |
759 | continue; // skip the entries we have already read |
760 | |
761 | if (state->read_offset >= state->n_count) |
762 | break; |
763 | |
764 | const size_t entry_size = sizeof(ino_t) + sizeof(off_t) + sizeof(short) + sizeof(char) + entry->name.size() + 1; // +1 for the null terminator |
765 | if (bytes_copied + entry_size > user_size) |
766 | break; |
767 | |
768 | struct dirent *dirent = (struct dirent *) (((char *) user_buf) + bytes_copied); |
769 | dirent->d_ino = entry->ino; |
770 | dirent->d_type = entry->type; |
771 | dirent->d_reclen = entry_size; |
772 | dirent->d_off = entry_size - 1; |
773 | memcpy(dest: dirent->d_name, src: entry->name.data(), n: entry->name.size()); |
774 | dirent->d_name[entry->name.size()] = '\0'; |
775 | bytes_copied += entry_size; |
776 | state->read_offset++; |
777 | } |
778 | |
779 | return bytes_copied; |
780 | } |
781 | |
782 | long vfs_chdirat(fd_t dirfd, const char *path) |
783 | { |
784 | pr_dinfo2(vfs, "vfs_chdir('%s')" , path); |
785 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd); |
786 | if (base.isErr()) |
787 | return base.getErr(); |
788 | |
789 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_DIR); |
790 | if (dentry.isErr()) |
791 | return dentry.getErr(); |
792 | |
793 | auto old_cwd = dentry_from_fd(AT_FDCWD); |
794 | if (old_cwd) |
795 | dentry_unref(dentry: old_cwd.get()); |
796 | |
797 | current_process->working_directory = dentry.get(); |
798 | return 0; |
799 | } |
800 | |
801 | ssize_t vfs_getcwd(char *buf, size_t size) |
802 | { |
803 | auto cwd = dentry_from_fd(AT_FDCWD); |
804 | if (cwd.isErr()) |
805 | return cwd.getErr(); |
806 | |
807 | return dentry_path(dentry: cwd.get(), root: root_dentry, buf, size); |
808 | } |
809 | |
810 | long vfs_fchmodat(fd_t fd, const char *path, int perm, int flags) |
811 | { |
812 | pr_dinfo2(vfs, "vfs_fchmodat(fd=%d, path='%s', perm=%o, flags=%x)" , fd, path, perm, flags); |
813 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd); |
814 | if (base.isErr()) |
815 | return base.getErr(); |
816 | |
817 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_ANY_TYPE); |
818 | if (dentry.isErr()) |
819 | return dentry.getErr(); |
820 | |
821 | // TODO: check if the underlying filesystem supports chmod, and is not read-only |
822 | dentry->inode->perm = perm; |
823 | dentry_unref(dentry: dentry.get()); |
824 | return 0; |
825 | } |
826 | |
827 | long vfs_unlinkat(fd_t dirfd, const char *path) |
828 | { |
829 | pr_dinfo2(vfs, "vfs_unlinkat(dirfd=%d, path='%s')" , dirfd, path); |
830 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd); |
831 | if (base.isErr()) |
832 | return base.getErr(); |
833 | |
834 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_FILE | RESOLVE_SYMLINK_NOFOLLOW); |
835 | if (dentry.isErr()) |
836 | return dentry.getErr(); |
837 | |
838 | dentry_t *parent_dir = dentry_parent(dentry: *dentry); |
839 | if (parent_dir->inode == NULL || parent_dir->inode->ops == NULL || parent_dir->inode->ops->unlink == NULL) |
840 | { |
841 | dentry_unref(dentry: dentry.get()); |
842 | return -ENOTSUP; |
843 | } |
844 | |
845 | if (!inode_unlink(dir: parent_dir->inode, dentry: dentry.get())) |
846 | { |
847 | dentry_unref(dentry: dentry.get()); |
848 | return -EIO; |
849 | } |
850 | |
851 | dentry_unref(dentry: dentry.get()); // it won't release dentry because dentry->inode is still valid |
852 | dentry_detach(dentry: dentry.get()); |
853 | dentry_try_release(dentry: dentry.get()); |
854 | return 0; |
855 | } |
856 | |
857 | long vfs_fsync(io_t *io, bool sync_metadata, off_t start, off_t end) |
858 | { |
859 | pr_dinfo2(vfs, "vfs_fsync(io=%p, sync_metadata=%d, start=%ld, end=%ld)" , (void *) io, sync_metadata, start, end); |
860 | file_t *file = container_of(io, file_t, io); |
861 | |
862 | const off_t nbytes = end - start; |
863 | const off_t npages = ALIGN_UP_TO_PAGE(nbytes) / MOS_PAGE_SIZE; |
864 | const off_t pgoffset = start / MOS_PAGE_SIZE; |
865 | |
866 | long ret = do_pagecache_flush(file, pgoff: pgoffset, npages); |
867 | if (ret < 0) |
868 | return ret; |
869 | |
870 | if (sync_metadata) |
871 | { |
872 | ret = do_sync_inode(file); |
873 | if (ret < 0) |
874 | return ret; |
875 | } |
876 | |
877 | return ret; |
878 | } |
879 | |
880 | // ! sysfs support |
881 | |
882 | static bool vfs_sysfs_filesystems(sysfs_file_t *f) |
883 | { |
884 | list_foreach(filesystem_t, fs, vfs_fs_list) |
885 | { |
886 | sysfs_printf(file: f, fmt: "%s\n" , fs->name.c_str()); |
887 | } |
888 | |
889 | return true; |
890 | } |
891 | |
892 | static bool vfs_sysfs_mountpoints(sysfs_file_t *f) |
893 | { |
894 | char pathbuf[MOS_PATH_MAX_LENGTH]; |
895 | list_foreach(mount_t, mp, vfs_mountpoint_list) |
896 | { |
897 | dentry_path(dentry: mp->mountpoint, root: root_dentry, buf: pathbuf, size: sizeof(pathbuf)); |
898 | sysfs_printf(file: f, fmt: "%-20s %-10s\n" , pathbuf, mp->fs->name.c_str()); |
899 | } |
900 | |
901 | return true; |
902 | } |
903 | |
904 | static void vfs_sysfs_dentry_stats_stat_receiver(int depth, const dentry_t *dentry, bool mountroot, void *data) |
905 | { |
906 | sysfs_file_t *file = (sysfs_file_t *) data; |
907 | sysfs_printf(file, fmt: "%*s%s: refcount=%zu%s\n" , // |
908 | depth * 4, // |
909 | "" , // |
910 | dentry_name(dentry).c_str(), // |
911 | dentry->refcount.load(), // |
912 | mountroot ? " (mount root)" : (dentry->is_mountpoint ? " (mountpoint)" : "" ) // |
913 | ); |
914 | } |
915 | |
916 | static bool vfs_sysfs_dentry_stats(sysfs_file_t *f) |
917 | { |
918 | dentry_dump_refstat(dentry: root_dentry, receiver: vfs_sysfs_dentry_stats_stat_receiver, data: f); |
919 | return true; |
920 | } |
921 | |
922 | static sysfs_item_t vfs_sysfs_items[] = { |
923 | SYSFS_RO_ITEM("filesystems" , vfs_sysfs_filesystems), |
924 | SYSFS_RO_ITEM("mount" , vfs_sysfs_mountpoints), |
925 | SYSFS_RO_ITEM("dentry_stats" , vfs_sysfs_dentry_stats), |
926 | }; |
927 | |
928 | SYSFS_AUTOREGISTER(vfs, vfs_sysfs_items); |
929 | |