1 | // SPDX-License-Identifier: GPL-3.0-or-later |
2 | |
3 | #include "mos/assert.h" |
4 | #include "mos/device/timer.h" |
5 | #include "mos/filesystem/inode.h" |
6 | #include "mos/filesystem/mount.h" |
7 | #include "mos/filesystem/page_cache.h" |
8 | #include "mos/filesystem/sysfs/sysfs.h" |
9 | #include "mos/filesystem/sysfs/sysfs_autoinit.h" |
10 | #include "mos/mm/mm.h" |
11 | #include "mos/mm/mmstat.h" |
12 | #include "mos/mm/physical/pmm.h" |
13 | #include "mos/mm/slab_autoinit.h" |
14 | |
15 | #include <dirent.h> |
16 | #include <errno.h> |
17 | #include <mos/filesystem/dentry.h> |
18 | #include <mos/filesystem/fs_types.h> |
19 | #include <mos/filesystem/vfs.h> |
20 | #include <mos/filesystem/vfs_types.h> |
21 | #include <mos/io/io.h> |
22 | #include <mos/lib/structures/list.h> |
23 | #include <mos/lib/structures/tree.h> |
24 | #include <mos/lib/sync/spinlock.h> |
25 | #include <mos/mos_global.h> |
26 | #include <mos/platform/platform.h> |
27 | #include <mos/syslog/printk.h> |
28 | #include <mos/tasks/process.h> |
29 | #include <mos/types.h> |
30 | #include <mos_stdlib.h> |
31 | #include <mos_string.h> |
32 | |
33 | static list_head vfs_fs_list = LIST_HEAD_INIT(vfs_fs_list); // filesystem_t |
34 | static spinlock_t vfs_fs_list_lock = SPINLOCK_INIT; |
35 | |
36 | dentry_t *root_dentry = NULL; |
37 | |
38 | slab_t *superblock_cache = NULL, *mount_cache = NULL, *file_cache = NULL; |
39 | |
40 | SLAB_AUTOINIT("superblock" , superblock_cache, superblock_t); |
41 | SLAB_AUTOINIT("mount" , mount_cache, mount_t); |
42 | SLAB_AUTOINIT("file" , file_cache, file_t); |
43 | |
44 | static long do_pagecache_flush(file_t *file, off_t pgoff, size_t npages) |
45 | { |
46 | pr_dinfo2(vfs, "vfs: flushing page cache for file %pio" , (void *) &file->io); |
47 | |
48 | mutex_acquire(mutex: &file->dentry->inode->cache.lock); |
49 | long ret = 0; |
50 | if (pgoff == 0 && npages == (size_t) -1) |
51 | ret = pagecache_flush_or_drop_all(icache: &file->dentry->inode->cache, drop_page: false); |
52 | else |
53 | ret = pagecache_flush_or_drop(icache: &file->dentry->inode->cache, pgoff, npages, drop_page: false); |
54 | |
55 | mutex_release(mutex: &file->dentry->inode->cache.lock); |
56 | return ret; |
57 | } |
58 | |
59 | static long do_sync_inode(file_t *file) |
60 | { |
61 | const superblock_ops_t *ops = file->dentry->inode->superblock->ops; |
62 | if (ops && ops->sync_inode) |
63 | return ops->sync_inode(file->dentry->inode); |
64 | |
65 | return 0; |
66 | } |
67 | |
68 | // BEGIN: filesystem's io_t operations |
69 | static void vfs_io_ops_close(io_t *io) |
70 | { |
71 | file_t *file = container_of(io, file_t, io); |
72 | if (io->type == IO_FILE && io->flags & IO_WRITABLE) // only flush if the file is writable |
73 | { |
74 | do_pagecache_flush(file, pgoff: 0, npages: (off_t) -1); |
75 | do_sync_inode(file); |
76 | } |
77 | |
78 | dentry_unref(dentry: file->dentry); |
79 | |
80 | if (io->type == IO_FILE) |
81 | { |
82 | const file_ops_t *file_ops = file_get_ops(file); |
83 | if (file_ops) |
84 | { |
85 | if (file_ops->release) |
86 | file_ops->release(file); |
87 | } |
88 | } |
89 | |
90 | kfree(ptr: file); |
91 | } |
92 | |
93 | static void vfs_io_ops_close_dir(io_t *io) |
94 | { |
95 | file_t *file = container_of(io, file_t, io); |
96 | |
97 | if (file->private_data) |
98 | { |
99 | vfs_listdir_state_t *state = file->private_data; |
100 | list_foreach(vfs_listdir_entry_t, entry, state->entries) |
101 | { |
102 | list_remove(entry); |
103 | kfree(ptr: entry->name); |
104 | kfree(ptr: entry); |
105 | } |
106 | |
107 | kfree(ptr: state); |
108 | file->private_data = NULL; |
109 | } |
110 | |
111 | vfs_io_ops_close(io); // close the file |
112 | } |
113 | |
114 | static size_t vfs_io_ops_read(io_t *io, void *buf, size_t count) |
115 | { |
116 | file_t *file = container_of(io, file_t, io); |
117 | const file_ops_t *const file_ops = file_get_ops(file); |
118 | if (!file_ops || !file_ops->read) |
119 | return 0; |
120 | |
121 | spinlock_acquire(&file->offset_lock); |
122 | size_t ret = file_ops->read(file, buf, count, file->offset); |
123 | if (IS_ERR_VALUE(ret)) |
124 | ; // do nothing |
125 | else if (ret != (size_t) -1) |
126 | file->offset += ret; |
127 | spinlock_release(&file->offset_lock); |
128 | |
129 | return ret; |
130 | } |
131 | |
132 | static size_t vfs_io_ops_write(io_t *io, const void *buf, size_t count) |
133 | { |
134 | file_t *file = container_of(io, file_t, io); |
135 | const file_ops_t *const file_ops = file_get_ops(file); |
136 | if (!file_ops || !file_ops->write) |
137 | return 0; |
138 | |
139 | spinlock_acquire(&file->offset_lock); |
140 | size_t ret = file_ops->write(file, buf, count, file->offset); |
141 | if (!IS_ERR_VALUE(ret)) |
142 | file->offset += ret; |
143 | spinlock_release(&file->offset_lock); |
144 | return ret; |
145 | } |
146 | |
147 | static off_t vfs_io_ops_seek(io_t *io, off_t offset, io_seek_whence_t whence) |
148 | { |
149 | file_t *file = container_of(io, file_t, io); |
150 | |
151 | const file_ops_t *const ops = file_get_ops(file); |
152 | if (ops->seek) |
153 | return ops->seek(file, offset, whence); // use the filesystem's lseek if it exists |
154 | |
155 | spinlock_acquire(&file->offset_lock); |
156 | |
157 | switch (whence) |
158 | { |
159 | case IO_SEEK_SET: |
160 | { |
161 | file->offset = MAX(offset, 0); |
162 | break; |
163 | } |
164 | case IO_SEEK_CURRENT: |
165 | { |
166 | off_t new_offset = file->offset + offset; |
167 | new_offset = MAX(new_offset, 0); |
168 | file->offset = new_offset; |
169 | break; |
170 | } |
171 | case IO_SEEK_END: |
172 | { |
173 | off_t new_offset = file->dentry->inode->size + offset; |
174 | new_offset = MAX(new_offset, 0); |
175 | file->offset = new_offset; |
176 | break; |
177 | } |
178 | case IO_SEEK_DATA: mos_warn("vfs: IO_SEEK_DATA is not supported" ); break; |
179 | case IO_SEEK_HOLE: mos_warn("vfs: IO_SEEK_HOLE is not supported" ); break; |
180 | }; |
181 | |
182 | spinlock_release(&file->offset_lock); |
183 | return file->offset; |
184 | } |
185 | |
186 | static vmfault_result_t vfs_fault_handler(vmap_t *vmap, ptr_t fault_addr, pagefault_t *info) |
187 | { |
188 | MOS_ASSERT(vmap->io); |
189 | file_t *file = container_of(vmap->io, file_t, io); |
190 | const size_t fault_pgoffset = (vmap->io_offset + ALIGN_DOWN_TO_PAGE(fault_addr) - vmap->vaddr) / MOS_PAGE_SIZE; |
191 | |
192 | mutex_acquire(mutex: &file->dentry->inode->cache.lock); // lock the inode cache |
193 | phyframe_t *const pagecache_page = pagecache_get_page_for_read(cache: &file->dentry->inode->cache, pgoff: fault_pgoffset); |
194 | mutex_release(mutex: &file->dentry->inode->cache.lock); |
195 | |
196 | if (IS_ERR(ptr: pagecache_page)) |
197 | return VMFAULT_CANNOT_HANDLE; |
198 | |
199 | // ! mm subsystem has verified that this vmap can be written to, but in the page table it's marked as read-only |
200 | // * currently, only CoW pages have this property, we treat this as a CoW page |
201 | if (info->is_present && info->is_write) |
202 | { |
203 | if (pagecache_page == info->faulting_page) |
204 | vmap_stat_dec(vmap, pagecache); // the faulting page is a pagecache page |
205 | else |
206 | vmap_stat_dec(vmap, cow); // the faulting page is a COW page |
207 | vmap_stat_inc(vmap, regular); |
208 | return mm_resolve_cow_fault(vmap, fault_addr, info); // resolve by copying data page into prevate page |
209 | } |
210 | |
211 | info->backing_page = pagecache_page; |
212 | if (vmap->type == VMAP_TYPE_PRIVATE) |
213 | { |
214 | if (info->is_write) |
215 | { |
216 | vmap_stat_inc(vmap, regular); |
217 | // present pages are handled above |
218 | MOS_ASSERT(!info->is_present); |
219 | return VMFAULT_COPY_BACKING_PAGE; // copy and (also) map the backing page |
220 | } |
221 | else |
222 | { |
223 | vmap_stat_inc(vmap, pagecache); |
224 | vmap_stat_inc(vmap, cow); |
225 | return VMFAULT_MAP_BACKING_PAGE_RO; |
226 | } |
227 | } |
228 | else |
229 | { |
230 | vmap_stat_inc(vmap, pagecache); |
231 | vmap_stat_inc(vmap, regular); |
232 | return VMFAULT_MAP_BACKING_PAGE; |
233 | } |
234 | } |
235 | |
236 | static bool vfs_io_ops_mmap(io_t *io, vmap_t *vmap, off_t offset) |
237 | { |
238 | file_t *file = container_of(io, file_t, io); |
239 | const file_ops_t *const file_ops = file_get_ops(file); |
240 | |
241 | MOS_ASSERT(!vmap->on_fault); // there should be no fault handler set |
242 | vmap->on_fault = vfs_fault_handler; |
243 | |
244 | if (file_ops->mmap) |
245 | return file_ops->mmap(file, vmap, offset); |
246 | |
247 | return true; |
248 | } |
249 | |
250 | static bool vfs_io_ops_munmap(io_t *io, vmap_t *vmap, bool *unmapped) |
251 | { |
252 | file_t *file = container_of(io, file_t, io); |
253 | const file_ops_t *const file_ops = file_get_ops(file); |
254 | |
255 | if (file_ops->munmap) |
256 | return file_ops->munmap(file, vmap, unmapped); |
257 | |
258 | return true; |
259 | } |
260 | |
261 | static void vfs_io_ops_getname(const io_t *io, char *buf, size_t size) |
262 | { |
263 | const file_t *file = container_of(io, file_t, io); |
264 | dentry_path(dentry: file->dentry, root: root_dentry, buf, size); |
265 | } |
266 | |
267 | static const io_op_t file_io_ops = { |
268 | .read = vfs_io_ops_read, |
269 | .write = vfs_io_ops_write, |
270 | .close = vfs_io_ops_close, |
271 | .seek = vfs_io_ops_seek, |
272 | .mmap = vfs_io_ops_mmap, |
273 | .munmap = vfs_io_ops_munmap, |
274 | .get_name = vfs_io_ops_getname, |
275 | }; |
276 | |
277 | static const io_op_t dir_io_ops = { |
278 | .read = vfs_list_dir, |
279 | .close = vfs_io_ops_close_dir, |
280 | .get_name = vfs_io_ops_getname, |
281 | }; |
282 | |
283 | // END: filesystem's io_t operations |
284 | |
285 | static void vfs_flusher_entry(void *arg) |
286 | { |
287 | MOS_UNUSED(arg); |
288 | while (true) |
289 | { |
290 | timer_msleep(ms: 10 * 1000); |
291 | // pagecache_flush_all(); |
292 | } |
293 | } |
294 | |
295 | static void vfs_flusher_init(void) |
296 | { |
297 | // kthread_create(vfs_flusher_entry, NULL, "vfs_flusher"); |
298 | } |
299 | MOS_INIT(KTHREAD, vfs_flusher_init); |
300 | |
301 | static void vfs_copy_stat(file_stat_t *statbuf, inode_t *inode) |
302 | { |
303 | statbuf->ino = inode->ino; |
304 | statbuf->type = inode->type; |
305 | statbuf->perm = inode->perm; |
306 | statbuf->size = inode->size; |
307 | statbuf->uid = inode->uid; |
308 | statbuf->gid = inode->gid; |
309 | statbuf->sticky = inode->sticky; |
310 | statbuf->suid = inode->suid; |
311 | statbuf->sgid = inode->sgid; |
312 | statbuf->nlinks = inode->nlinks; |
313 | statbuf->accessed = inode->accessed; |
314 | statbuf->modified = inode->modified; |
315 | statbuf->created = inode->created; |
316 | } |
317 | |
318 | static filesystem_t *vfs_find_filesystem(const char *name) |
319 | { |
320 | filesystem_t *fs_found = NULL; |
321 | spinlock_acquire(&vfs_fs_list_lock); |
322 | list_foreach(filesystem_t, fs, vfs_fs_list) |
323 | { |
324 | if (strcmp(str1: fs->name, str2: name) == 0) |
325 | { |
326 | fs_found = fs; |
327 | break; |
328 | } |
329 | } |
330 | spinlock_release(&vfs_fs_list_lock); |
331 | return fs_found; |
332 | } |
333 | |
334 | static bool vfs_verify_permissions(dentry_t *file_dentry, bool open, bool read, bool create, bool execute, bool write) |
335 | { |
336 | MOS_ASSERT(file_dentry && file_dentry->inode); |
337 | const file_perm_t file_perm = file_dentry->inode->perm; |
338 | |
339 | // TODO: we are treating all users as root for now, only checks for execute permission |
340 | MOS_UNUSED(open); |
341 | MOS_UNUSED(read); |
342 | MOS_UNUSED(create); |
343 | MOS_UNUSED(write); |
344 | |
345 | if (execute && !(file_perm & PERM_EXEC)) |
346 | return false; // execute permission denied |
347 | |
348 | return true; |
349 | } |
350 | |
351 | static file_t *vfs_do_open(dentry_t *base, const char *path, open_flags flags) |
352 | { |
353 | if (base == NULL) |
354 | return NULL; |
355 | |
356 | const bool may_create = flags & OPEN_CREATE; |
357 | const bool read = flags & OPEN_READ; |
358 | const bool write = flags & OPEN_WRITE; |
359 | const bool exec = flags & OPEN_EXECUTE; |
360 | const bool no_follow = flags & OPEN_NO_FOLLOW; |
361 | const bool expect_dir = flags & OPEN_DIR; |
362 | const bool truncate = flags & OPEN_TRUNCATE; |
363 | |
364 | lastseg_resolve_flags_t resolve_flags = RESOLVE_EXPECT_FILE | // |
365 | (no_follow ? RESOLVE_SYMLINK_NOFOLLOW : 0) | // |
366 | (may_create ? RESOLVE_EXPECT_ANY_EXIST : RESOLVE_EXPECT_EXIST) | // |
367 | (expect_dir ? RESOLVE_EXPECT_DIR : 0); |
368 | dentry_t *entry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: resolve_flags); |
369 | if (IS_ERR(ptr: entry)) |
370 | { |
371 | pr_dinfo2(vfs, "failed to resolve '%s': create=%d, r=%d, x=%d, nofollow=%d, dir=%d, truncate=%d" , path, may_create, read, exec, no_follow, expect_dir, truncate); |
372 | return ERR(ptr: entry); |
373 | } |
374 | |
375 | bool created = false; |
376 | |
377 | if (may_create && entry->inode == NULL) |
378 | { |
379 | dentry_t *parent = dentry_parent(dentry: entry); |
380 | if (!parent->inode->ops->newfile) |
381 | { |
382 | dentry_unref(dentry: entry); |
383 | return ERR_PTR(error: -EROFS); |
384 | } |
385 | |
386 | if (!parent->inode->ops->newfile(parent->inode, entry, FILE_TYPE_REGULAR, 0666)) |
387 | { |
388 | dentry_unref(dentry: entry); |
389 | return ERR_PTR(error: -EIO); // failed to create file |
390 | } |
391 | |
392 | created = true; |
393 | } |
394 | |
395 | if (!vfs_verify_permissions(file_dentry: entry, open: true, read, create: may_create, execute: exec, write)) |
396 | { |
397 | dentry_unref(dentry: entry); |
398 | return ERR_PTR(error: -EACCES); |
399 | } |
400 | |
401 | file_t *file = vfs_do_open_dentry(entry, created, read, write, exec, truncate); |
402 | if (IS_ERR(ptr: file)) |
403 | { |
404 | kfree(ptr: file); |
405 | dentry_unref(dentry: entry); |
406 | return ERR(ptr: file); |
407 | } |
408 | |
409 | return file; |
410 | } |
411 | |
412 | // public functions |
413 | file_t *vfs_do_open_dentry(dentry_t *entry, bool created, bool read, bool write, bool exec, bool truncate) |
414 | { |
415 | MOS_ASSERT(entry->inode); |
416 | MOS_UNUSED(truncate); |
417 | |
418 | file_t *file = kmalloc(file_cache); |
419 | file->dentry = entry; |
420 | |
421 | io_flags_t io_flags = IO_SEEKABLE; |
422 | |
423 | if (read) |
424 | io_flags |= IO_READABLE; |
425 | |
426 | if (write) |
427 | io_flags |= IO_WRITABLE; |
428 | |
429 | if (exec) |
430 | io_flags |= IO_EXECUTABLE; |
431 | |
432 | // only regular files are mmapable |
433 | if (entry->inode->type == FILE_TYPE_REGULAR) |
434 | io_flags |= IO_MMAPABLE; |
435 | |
436 | if (file->dentry->inode->type == FILE_TYPE_DIRECTORY) |
437 | io_init(io: &file->io, type: IO_DIR, flags: (io_flags | IO_READABLE) & ~IO_SEEKABLE, ops: &dir_io_ops); |
438 | else |
439 | io_init(io: &file->io, type: IO_FILE, flags: io_flags, ops: &file_io_ops); |
440 | |
441 | const file_ops_t *ops = file_get_ops(file); |
442 | if (ops && ops->open) |
443 | { |
444 | bool opened = ops->open(file->dentry->inode, file, created); |
445 | if (!opened) |
446 | return ERR_PTR(error: -ENOTSUP); |
447 | } |
448 | |
449 | return file; |
450 | } |
451 | |
452 | void vfs_register_filesystem(filesystem_t *fs) |
453 | { |
454 | if (vfs_find_filesystem(name: fs->name)) |
455 | mos_panic("filesystem '%s' already registered" , fs->name); |
456 | |
457 | MOS_ASSERT(list_is_empty(list_node(fs))); |
458 | |
459 | spinlock_acquire(&vfs_fs_list_lock); |
460 | list_node_append(head: &vfs_fs_list, list_node(fs)); |
461 | spinlock_release(&vfs_fs_list_lock); |
462 | |
463 | pr_dinfo2(vfs, "filesystem '%s' registered" , fs->name); |
464 | } |
465 | |
466 | long vfs_mount(const char *device, const char *path, const char *fs, const char *options) |
467 | { |
468 | filesystem_t *real_fs = vfs_find_filesystem(name: fs); |
469 | if (unlikely(real_fs == NULL)) |
470 | { |
471 | mos_warn("filesystem '%s' not found" , fs); |
472 | return -EINVAL; |
473 | } |
474 | |
475 | MOS_ASSERT_X(real_fs->mount, "filesystem '%s' does not support mounting" , real_fs->name); |
476 | |
477 | if (unlikely(strcmp(path, "/" ) == 0)) |
478 | { |
479 | // special case: mount root filesystem |
480 | if (root_dentry) |
481 | { |
482 | pr_warn("root filesystem is already mounted" ); |
483 | return -EBUSY; |
484 | } |
485 | pr_dinfo2(vfs, "mounting root filesystem '%s'..." , fs); |
486 | root_dentry = real_fs->mount(real_fs, device, options); |
487 | if (root_dentry == NULL) |
488 | { |
489 | mos_warn("failed to mount root filesystem" ); |
490 | return -EIO; |
491 | } |
492 | pr_dinfo2(vfs, "root filesystem mounted, dentry=%p" , (void *) root_dentry); |
493 | |
494 | MOS_ASSERT(root_dentry->name == NULL); |
495 | bool mounted = dentry_mount(mountpoint: root_dentry, root: root_dentry, fs: real_fs); |
496 | MOS_ASSERT(mounted); |
497 | |
498 | return 0; |
499 | } |
500 | |
501 | dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
502 | dentry_t *mountpoint = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_DIR | RESOLVE_EXPECT_EXIST); |
503 | if (IS_ERR(ptr: mountpoint)) |
504 | return PTR_ERR(ptr: mountpoint); |
505 | |
506 | if (mountpoint->is_mountpoint) |
507 | { |
508 | // we don't support overlaying filesystems yet |
509 | mos_warn("mount point is already mounted" ); |
510 | dentry_unref(dentry: mountpoint); |
511 | return -ENOTSUP; |
512 | } |
513 | |
514 | // when mounting: |
515 | // mounted_root will have a reference of 1 |
516 | // the mount_point will have its reference incremented by 1 |
517 | dentry_t *mounted_root = real_fs->mount(real_fs, device, options); |
518 | if (IS_ERR(ptr: mounted_root)) |
519 | { |
520 | mos_warn("failed to mount filesystem" ); |
521 | return PTR_ERR(ptr: mounted_root); |
522 | } |
523 | |
524 | const bool mounted = dentry_mount(mountpoint, root: mounted_root, fs: real_fs); |
525 | if (unlikely(!mounted)) |
526 | { |
527 | mos_warn("failed to mount filesystem" ); |
528 | return -EIO; |
529 | } |
530 | |
531 | MOS_ASSERT_X(mountpoint->refcount == mounted_root->refcount, "mountpoint refcount=%zu, mounted_root refcount=%zu" , mountpoint->refcount, mounted_root->refcount); |
532 | pr_dinfo2(vfs, "mounted filesystem '%s' on '%s'" , fs, path); |
533 | return 0; |
534 | } |
535 | |
536 | long vfs_unmount(const char *path) |
537 | { |
538 | dentry_t *mounted_root = dentry_resolve(starting_dir: root_dentry, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_DIR | RESOLVE_EXPECT_EXIST); |
539 | if (IS_ERR(ptr: mounted_root)) |
540 | return PTR_ERR(ptr: mounted_root); |
541 | |
542 | // the mounted root itself holds a ref, and the caller of this function |
543 | if (mounted_root->refcount != 2) |
544 | { |
545 | dentry_check_refstat(dentry: mounted_root); |
546 | mos_warn("refcount is not as expected" ); |
547 | return -EBUSY; |
548 | } |
549 | |
550 | dentry_unref(dentry: mounted_root); // release the reference held by this function |
551 | |
552 | // unmounting root filesystem |
553 | dentry_t *mountpoint = dentry_unmount(root: mounted_root); |
554 | if (!mountpoint) |
555 | { |
556 | mos_warn("failed to unmount filesystem" ); |
557 | return -EIO; |
558 | } |
559 | |
560 | MOS_ASSERT(mounted_root->refcount == mountpoint->refcount && mountpoint->refcount == 1); |
561 | if (mounted_root->superblock->fs->unmount) |
562 | mounted_root->superblock->fs->unmount(mounted_root->superblock->fs, mounted_root); |
563 | else |
564 | MOS_ASSERT(dentry_unref_one_norelease(mounted_root)); |
565 | MOS_ASSERT_X(mounted_root->refcount == 0, "fs->umount should release the last reference to the mounted root" ); |
566 | |
567 | if (mounted_root == root_dentry) |
568 | { |
569 | pr_info2("unmounted root filesystem" ); |
570 | root_dentry = NULL; |
571 | return 0; |
572 | } |
573 | |
574 | dentry_unref(dentry: mountpoint); |
575 | return 0; |
576 | } |
577 | |
578 | file_t *vfs_openat(int fd, const char *path, open_flags flags) |
579 | { |
580 | pr_dinfo2(vfs, "vfs_openat(fd=%d, path='%s', flags=%x)" , fd, path, flags); |
581 | dentry_t *basedir = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd); |
582 | if (IS_ERR(ptr: basedir)) |
583 | return ERR(ptr: basedir); |
584 | file_t *file = vfs_do_open(base: basedir, path, flags); |
585 | return file; |
586 | } |
587 | |
588 | long vfs_fstatat(fd_t fd, const char *path, file_stat_t *restrict statbuf, fstatat_flags flags) |
589 | { |
590 | if (flags & FSTATAT_FILE) |
591 | { |
592 | pr_dinfo2(vfs, "vfs_fstatat(fd=%d, path='%p', stat=%p, flags=%x)" , fd, (void *) path, (void *) statbuf, flags); |
593 | io_t *io = process_get_fd(current_process, fd); |
594 | if (!(io_valid(io) && (io->type == IO_FILE || io->type == IO_DIR))) |
595 | return -EBADF; // io is closed, or is not a file or directory |
596 | |
597 | file_t *file = container_of(io, file_t, io); |
598 | MOS_ASSERT(file); |
599 | if (statbuf) |
600 | vfs_copy_stat(statbuf, inode: file->dentry->inode); |
601 | |
602 | return 0; |
603 | } |
604 | |
605 | pr_dinfo2(vfs, "vfs_fstatat(fd=%d, path='%s', stat=%p, flags=%x)" , fd, path, (void *) statbuf, flags); |
606 | dentry_t *basedir = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd); |
607 | if (IS_ERR(ptr: basedir)) |
608 | return PTR_ERR(ptr: basedir); |
609 | lastseg_resolve_flags_t resolve_flags = RESOLVE_EXPECT_ANY_TYPE | RESOLVE_EXPECT_EXIST; |
610 | if (flags & FSTATAT_NOFOLLOW) |
611 | resolve_flags |= RESOLVE_SYMLINK_NOFOLLOW; |
612 | |
613 | dentry_t *dentry = dentry_resolve(starting_dir: basedir, root_dir: root_dentry, path, flags: resolve_flags); |
614 | if (IS_ERR(ptr: dentry)) |
615 | return PTR_ERR(ptr: dentry); |
616 | |
617 | if (statbuf) |
618 | vfs_copy_stat(statbuf, inode: dentry->inode); |
619 | dentry_unref(dentry); |
620 | return 0; |
621 | } |
622 | |
623 | size_t vfs_readlinkat(fd_t dirfd, const char *path, char *buf, size_t size) |
624 | { |
625 | dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd); |
626 | dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_SYMLINK_NOFOLLOW | RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_FILE); |
627 | if (IS_ERR(ptr: dentry)) |
628 | return PTR_ERR(ptr: dentry); |
629 | |
630 | if (dentry->inode->type != FILE_TYPE_SYMLINK) |
631 | { |
632 | dentry_unref(dentry); |
633 | return -EINVAL; |
634 | } |
635 | |
636 | const size_t len = dentry->inode->ops->readlink(dentry, buf, size); |
637 | |
638 | dentry_unref(dentry); |
639 | |
640 | if (len >= size) // buffer too small |
641 | return -ENAMETOOLONG; |
642 | |
643 | return len; |
644 | } |
645 | |
646 | long vfs_symlink(const char *path, const char *target) |
647 | { |
648 | pr_dinfo2(vfs, "vfs_symlink(path='%s', target='%s')" , path, target); |
649 | dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
650 | dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_NONEXIST); |
651 | if (IS_ERR(ptr: dentry)) |
652 | return PTR_ERR(ptr: dentry); |
653 | |
654 | dentry_t *parent_dir = dentry_parent(dentry); |
655 | const bool created = parent_dir->inode->ops->symlink(parent_dir->inode, dentry, target); |
656 | |
657 | if (!created) |
658 | mos_warn("failed to create symlink '%s'" , path); |
659 | |
660 | dentry_unref(dentry); |
661 | return created ? 0 : -EIO; |
662 | } |
663 | |
664 | long vfs_mkdir(const char *path) |
665 | { |
666 | pr_dinfo2(vfs, "vfs_mkdir('%s')" , path); |
667 | dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
668 | dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_NONEXIST); |
669 | if (IS_ERR(ptr: dentry)) |
670 | return PTR_ERR(ptr: dentry); |
671 | |
672 | dentry_t *parent_dir = dentry_parent(dentry); |
673 | if (parent_dir->inode == NULL || parent_dir->inode->ops == NULL || parent_dir->inode->ops->mkdir == NULL) |
674 | { |
675 | dentry_unref(dentry); |
676 | return false; |
677 | } |
678 | |
679 | // TODO: use umask or something else |
680 | const bool created = parent_dir->inode->ops->mkdir(parent_dir->inode, dentry, parent_dir->inode->perm); |
681 | |
682 | if (!created) |
683 | mos_warn("failed to create directory '%s'" , path); |
684 | |
685 | dentry_unref(dentry); |
686 | return created ? 0 : -EIO; |
687 | } |
688 | |
689 | long vfs_rmdir(const char *path) |
690 | { |
691 | pr_dinfo2(vfs, "vfs_rmdir('%s')" , path); |
692 | dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
693 | dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_DIR); |
694 | if (IS_ERR(ptr: dentry)) |
695 | return PTR_ERR(ptr: dentry); |
696 | |
697 | dentry_t *parent_dir = dentry_parent(dentry); |
698 | if (parent_dir->inode == NULL || parent_dir->inode->ops == NULL || parent_dir->inode->ops->rmdir == NULL) |
699 | { |
700 | dentry_unref(dentry); |
701 | return -ENOTSUP; |
702 | } |
703 | |
704 | const bool removed = parent_dir->inode->ops->rmdir(parent_dir->inode, dentry); |
705 | |
706 | if (!removed) |
707 | mos_warn("failed to remove directory '%s'" , path); |
708 | |
709 | dentry_unref(dentry); |
710 | return removed ? 0 : -EIO; |
711 | } |
712 | |
713 | size_t vfs_list_dir(io_t *io, void *user_buf, size_t user_size) |
714 | { |
715 | pr_dinfo2(vfs, "vfs_list_dir(io=%p, buf=%p, size=%zu)" , (void *) io, (void *) user_buf, user_size); |
716 | file_t *file = container_of(io, file_t, io); |
717 | if (unlikely(file->dentry->inode->type != FILE_TYPE_DIRECTORY)) |
718 | { |
719 | mos_warn("not a directory" ); |
720 | return 0; |
721 | } |
722 | |
723 | if (file->private_data == NULL) |
724 | { |
725 | vfs_listdir_state_t *const state = file->private_data = kmalloc(sizeof(vfs_listdir_state_t)); |
726 | linked_list_init(head_node: &state->entries); |
727 | state->n_count = state->read_offset = 0; |
728 | vfs_populate_listdir_buf(dir: file->dentry, state); |
729 | } |
730 | |
731 | vfs_listdir_state_t *const state = file->private_data; |
732 | |
733 | if (state->read_offset >= state->n_count) |
734 | return 0; // no more entries |
735 | |
736 | size_t bytes_copied = 0; |
737 | size_t i = 0; |
738 | list_foreach(vfs_listdir_entry_t, entry, state->entries) |
739 | { |
740 | if (i++ < state->read_offset) |
741 | continue; // skip the entries we have already read |
742 | |
743 | if (state->read_offset >= state->n_count) |
744 | break; |
745 | |
746 | const size_t entry_size = sizeof(ino_t) + sizeof(off_t) + sizeof(short) + sizeof(char) + entry->name_len + 1; // +1 for the null terminator |
747 | if (bytes_copied + entry_size > user_size) |
748 | break; |
749 | |
750 | struct dirent *dirent = (struct dirent *) (((char *) user_buf) + bytes_copied); |
751 | dirent->d_ino = entry->ino; |
752 | dirent->d_type = entry->type; |
753 | dirent->d_reclen = entry_size; |
754 | dirent->d_off = entry_size - 1; |
755 | memcpy(dest: dirent->d_name, src: entry->name, n: entry->name_len); |
756 | dirent->d_name[entry->name_len] = '\0'; |
757 | bytes_copied += entry_size; |
758 | state->read_offset++; |
759 | } |
760 | |
761 | return bytes_copied; |
762 | } |
763 | |
764 | long vfs_chdirat(fd_t dirfd, const char *path) |
765 | { |
766 | pr_dinfo2(vfs, "vfs_chdir('%s')" , path); |
767 | dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd); |
768 | dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_DIR); |
769 | if (IS_ERR(ptr: dentry)) |
770 | return PTR_ERR(ptr: dentry); |
771 | |
772 | dentry_t *old_cwd = dentry_from_fd(AT_FDCWD); |
773 | if (old_cwd) |
774 | dentry_unref(dentry: old_cwd); |
775 | |
776 | current_process->working_directory = dentry; |
777 | return 0; |
778 | } |
779 | |
780 | ssize_t vfs_getcwd(char *buf, size_t size) |
781 | { |
782 | dentry_t *cwd = dentry_from_fd(AT_FDCWD); |
783 | if (IS_ERR(ptr: cwd)) |
784 | return PTR_ERR(ptr: cwd); |
785 | |
786 | return dentry_path(dentry: cwd, root: root_dentry, buf, size); |
787 | } |
788 | |
789 | long vfs_fchmodat(fd_t fd, const char *path, int perm, int flags) |
790 | { |
791 | pr_dinfo2(vfs, "vfs_fchmodat(fd=%d, path='%s', perm=%o, flags=%x)" , fd, path, perm, flags); |
792 | dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd); |
793 | dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_ANY_TYPE); |
794 | if (IS_ERR(ptr: dentry)) |
795 | return PTR_ERR(ptr: dentry); |
796 | |
797 | // TODO: check if the underlying filesystem supports chmod, and is not read-only |
798 | dentry->inode->perm = perm; |
799 | dentry_unref(dentry); |
800 | return 0; |
801 | } |
802 | |
803 | long vfs_unlinkat(fd_t dirfd, const char *path) |
804 | { |
805 | pr_dinfo2(vfs, "vfs_unlinkat(dirfd=%d, path='%s')" , dirfd, path); |
806 | dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd); |
807 | dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_FILE | RESOLVE_SYMLINK_NOFOLLOW); |
808 | if (IS_ERR(ptr: dentry)) |
809 | return PTR_ERR(ptr: dentry); |
810 | |
811 | dentry_t *parent_dir = dentry_parent(dentry); |
812 | if (parent_dir->inode == NULL || parent_dir->inode->ops == NULL || parent_dir->inode->ops->unlink == NULL) |
813 | { |
814 | dentry_unref(dentry); |
815 | return -ENOTSUP; |
816 | } |
817 | |
818 | if (!inode_unlink(dir: parent_dir->inode, dentry)) |
819 | { |
820 | dentry_unref(dentry); |
821 | return -EIO; |
822 | } |
823 | |
824 | dentry_unref(dentry); // it won't release dentry because dentry->inode is still valid |
825 | dentry_detach(dentry); |
826 | dentry_try_release(dentry); |
827 | return 0; |
828 | } |
829 | |
830 | long vfs_fsync(io_t *io, bool sync_metadata, off_t start, off_t end) |
831 | { |
832 | pr_dinfo2(vfs, "vfs_fsync(io=%p, sync_metadata=%d, start=%ld, end=%ld)" , (void *) io, sync_metadata, start, end); |
833 | file_t *file = container_of(io, file_t, io); |
834 | |
835 | const off_t nbytes = end - start; |
836 | const off_t npages = ALIGN_UP_TO_PAGE(nbytes) / MOS_PAGE_SIZE; |
837 | const off_t pgoffset = start / MOS_PAGE_SIZE; |
838 | |
839 | long ret = do_pagecache_flush(file, pgoff: pgoffset, npages); |
840 | if (ret < 0) |
841 | return ret; |
842 | |
843 | if (sync_metadata) |
844 | { |
845 | ret = do_sync_inode(file); |
846 | if (ret < 0) |
847 | return ret; |
848 | } |
849 | |
850 | return ret; |
851 | } |
852 | |
853 | // ! sysfs support |
854 | |
855 | static bool vfs_sysfs_filesystems(sysfs_file_t *f) |
856 | { |
857 | list_foreach(filesystem_t, fs, vfs_fs_list) |
858 | { |
859 | sysfs_printf(file: f, fmt: "%s\n" , fs->name); |
860 | } |
861 | |
862 | return true; |
863 | } |
864 | |
865 | static bool vfs_sysfs_mountpoints(sysfs_file_t *f) |
866 | { |
867 | char pathbuf[MOS_PATH_MAX_LENGTH]; |
868 | list_foreach(mount_t, mp, vfs_mountpoint_list) |
869 | { |
870 | dentry_path(dentry: mp->mountpoint, root: root_dentry, buf: pathbuf, size: sizeof(pathbuf)); |
871 | sysfs_printf(file: f, fmt: "%-20s %-10s\n" , pathbuf, mp->fs->name); |
872 | } |
873 | |
874 | return true; |
875 | } |
876 | |
877 | static void vfs_sysfs_dentry_stats_stat_receiver(int depth, const dentry_t *dentry, bool mountroot, void *data) |
878 | { |
879 | sysfs_file_t *file = data; |
880 | sysfs_printf(file, fmt: "%*s%s: refcount=%zu%s\n" , // |
881 | depth * 4, // |
882 | "" , // |
883 | dentry_name(dentry), // |
884 | dentry->refcount, // |
885 | mountroot ? " (mount root)" : (dentry->is_mountpoint ? " (mountpoint)" : "" ) // |
886 | ); |
887 | } |
888 | |
889 | static bool vfs_sysfs_dentry_stats(sysfs_file_t *f) |
890 | { |
891 | dentry_dump_refstat(dentry: root_dentry, receiver: vfs_sysfs_dentry_stats_stat_receiver, data: f); |
892 | return true; |
893 | } |
894 | |
895 | static sysfs_item_t vfs_sysfs_items[] = { |
896 | SYSFS_RO_ITEM("filesystems" , vfs_sysfs_filesystems), |
897 | SYSFS_RO_ITEM("mount" , vfs_sysfs_mountpoints), |
898 | SYSFS_RO_ITEM("dentry_stats" , vfs_sysfs_dentry_stats), |
899 | }; |
900 | |
901 | SYSFS_AUTOREGISTER(vfs, vfs_sysfs_items); |
902 | |