| 1 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 2 | |
| 3 | #include "mos/assert.hpp" |
| 4 | #include "mos/device/timer.hpp" |
| 5 | #include "mos/filesystem/inode.hpp" |
| 6 | #include "mos/filesystem/mount.hpp" |
| 7 | #include "mos/filesystem/page_cache.hpp" |
| 8 | #include "mos/filesystem/sysfs/sysfs.hpp" |
| 9 | #include "mos/filesystem/sysfs/sysfs_autoinit.hpp" |
| 10 | #include "mos/mm/mm.hpp" |
| 11 | #include "mos/mm/mmstat.hpp" |
| 12 | |
| 13 | #include <algorithm> |
| 14 | #include <dirent.h> |
| 15 | #include <errno.h> |
| 16 | #include <mos/filesystem/dentry.hpp> |
| 17 | #include <mos/filesystem/fs_types.h> |
| 18 | #include <mos/filesystem/vfs.hpp> |
| 19 | #include <mos/filesystem/vfs_types.hpp> |
| 20 | #include <mos/io/io.hpp> |
| 21 | #include <mos/lib/structures/list.hpp> |
| 22 | #include <mos/lib/structures/tree.hpp> |
| 23 | #include <mos/lib/sync/spinlock.hpp> |
| 24 | #include <mos/mos_global.h> |
| 25 | #include <mos/platform/platform.hpp> |
| 26 | #include <mos/syslog/printk.hpp> |
| 27 | #include <mos/tasks/process.hpp> |
| 28 | #include <mos/types.hpp> |
| 29 | #include <mos_stdlib.hpp> |
| 30 | #include <mos_string.hpp> |
| 31 | |
| 32 | static list_head vfs_fs_list; // filesystem_t |
| 33 | static spinlock_t vfs_fs_list_lock; |
| 34 | |
| 35 | dentry_t *root_dentry = NULL; |
| 36 | |
| 37 | static long do_pagecache_flush(FsBaseFile *file, off_t pgoff, size_t npages) |
| 38 | { |
| 39 | dInfo2<vfs> << "vfs: flushing page cache for file " << (void *) file; |
| 40 | |
| 41 | mutex_acquire(mutex: &file->dentry->inode->cache.lock); |
| 42 | long ret = 0; |
| 43 | if (pgoff == 0 && npages == (size_t) -1) |
| 44 | ret = pagecache_flush_or_drop_all(icache: &file->dentry->inode->cache, drop_page: false); |
| 45 | else |
| 46 | ret = pagecache_flush_or_drop(icache: &file->dentry->inode->cache, pgoff, npages, drop_page: false); |
| 47 | |
| 48 | mutex_release(mutex: &file->dentry->inode->cache.lock); |
| 49 | return ret; |
| 50 | } |
| 51 | |
| 52 | static long do_sync_inode(FsBaseFile *file) |
| 53 | { |
| 54 | const superblock_ops_t *ops = file->dentry->inode->superblock->ops; |
| 55 | if (ops && ops->sync_inode) |
| 56 | return ops->sync_inode(file->dentry->inode); |
| 57 | |
| 58 | return 0; |
| 59 | } |
| 60 | |
| 61 | // BEGIN: filesystem's IO operations |
| 62 | void FsFile::on_closed() |
| 63 | { |
| 64 | if (io_type == IO_FILE && io_flags.test(b: IO_WRITABLE)) // only flush if the file is writable |
| 65 | { |
| 66 | do_pagecache_flush(file: this, pgoff: 0, npages: (off_t) -1); |
| 67 | do_sync_inode(file: this); |
| 68 | } |
| 69 | |
| 70 | dentry_unref(dentry: this->dentry); |
| 71 | |
| 72 | if (io_type == IO_FILE) |
| 73 | { |
| 74 | const file_ops_t *file_ops = get_ops(); |
| 75 | if (file_ops) |
| 76 | { |
| 77 | if (file_ops->release) |
| 78 | file_ops->release(this); |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | delete this; |
| 83 | } |
| 84 | |
| 85 | size_t FsDir::on_read(void *buf, size_t bufSize) |
| 86 | { |
| 87 | return vfs_list_dir(io: this, buf, size: bufSize); |
| 88 | } |
| 89 | |
| 90 | void FsDir::on_closed() |
| 91 | { |
| 92 | if (this->private_data) |
| 93 | { |
| 94 | vfs_listdir_state_t *state = static_cast<vfs_listdir_state_t *>(this->private_data); |
| 95 | list_foreach(vfs_listdir_entry_t, entry, state->entries) |
| 96 | { |
| 97 | list_remove(entry); |
| 98 | delete entry; |
| 99 | } |
| 100 | |
| 101 | delete state; |
| 102 | this->private_data = NULL; |
| 103 | } |
| 104 | |
| 105 | dentry_unref(dentry: this->dentry); |
| 106 | |
| 107 | if (io_type == IO_FILE) |
| 108 | { |
| 109 | const file_ops_t *file_ops = get_ops(); |
| 110 | if (file_ops) |
| 111 | { |
| 112 | if (file_ops->release) |
| 113 | file_ops->release(this); |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | delete this; |
| 118 | } |
| 119 | |
| 120 | size_t FsFile::on_read(void *buf, size_t count) |
| 121 | { |
| 122 | const file_ops_t *const file_ops = get_ops(); |
| 123 | if (!file_ops || !file_ops->read) |
| 124 | return 0; |
| 125 | |
| 126 | spinlock_acquire(&offset_lock); |
| 127 | size_t ret = file_ops->read(this, buf, count, this->offset); |
| 128 | if (IS_ERR_VALUE(ret)) |
| 129 | ; // do nothing |
| 130 | else if (ret != (size_t) -1) |
| 131 | this->offset += ret; |
| 132 | spinlock_release(&offset_lock); |
| 133 | return ret; |
| 134 | } |
| 135 | |
| 136 | size_t FsFile::on_write(const void *buf, size_t count) |
| 137 | { |
| 138 | const file_ops_t *const file_ops = get_ops(); |
| 139 | if (!file_ops || !file_ops->write) |
| 140 | return 0; |
| 141 | |
| 142 | spinlock_acquire(&offset_lock); |
| 143 | size_t ret = file_ops->write(this, buf, count, this->offset); |
| 144 | if (!IS_ERR_VALUE(ret)) |
| 145 | this->offset += ret; |
| 146 | spinlock_release(&offset_lock); |
| 147 | return ret; |
| 148 | } |
| 149 | |
| 150 | off_t FsFile::on_seek(off_t offset, io_seek_whence_t whence) |
| 151 | { |
| 152 | const file_ops_t *const ops = get_ops(); |
| 153 | if (ops->seek) |
| 154 | return ops->seek(this, offset, whence); // use the filesystem's lseek if it exists |
| 155 | |
| 156 | spinlock_acquire(&offset_lock); |
| 157 | |
| 158 | switch (whence) |
| 159 | { |
| 160 | case IO_SEEK_SET: |
| 161 | { |
| 162 | this->offset = std::max(a: offset, b: 0l); |
| 163 | break; |
| 164 | } |
| 165 | case IO_SEEK_CURRENT: |
| 166 | { |
| 167 | off_t new_offset = this->offset + offset; |
| 168 | new_offset = std::max(a: new_offset, b: 0l); |
| 169 | this->offset = new_offset; |
| 170 | break; |
| 171 | } |
| 172 | case IO_SEEK_END: |
| 173 | { |
| 174 | off_t new_offset = this->dentry->inode->size + offset; |
| 175 | new_offset = std::max(a: new_offset, b: 0l); |
| 176 | this->offset = new_offset; |
| 177 | break; |
| 178 | } |
| 179 | case IO_SEEK_DATA: dInfo2<vfs> << "vfs: IO_SEEK_DATA is not supported" ; break; |
| 180 | case IO_SEEK_HOLE: dInfo2<vfs> << "vfs: IO_SEEK_HOLE is not supported" ; break; |
| 181 | }; |
| 182 | |
| 183 | spinlock_release(&offset_lock); |
| 184 | return this->offset; |
| 185 | } |
| 186 | |
| 187 | static vmfault_result_t vfs_fault_handler(vmap_t *vmap, ptr_t fault_addr, pagefault_t *info) |
| 188 | { |
| 189 | MOS_ASSERT(vmap->io); |
| 190 | FsBaseFile *file = static_cast<FsBaseFile *>(vmap->io); |
| 191 | const size_t fault_pgoffset = (vmap->io_offset + ALIGN_DOWN_TO_PAGE(fault_addr) - vmap->vaddr) / MOS_PAGE_SIZE; |
| 192 | |
| 193 | mutex_acquire(mutex: &file->dentry->inode->cache.lock); // lock the inode cache |
| 194 | auto pagecache_page = pagecache_get_page_for_read(cache: &file->dentry->inode->cache, pgoff: fault_pgoffset); |
| 195 | mutex_release(mutex: &file->dentry->inode->cache.lock); |
| 196 | |
| 197 | if (pagecache_page.isErr()) |
| 198 | return VMFAULT_CANNOT_HANDLE; |
| 199 | |
| 200 | // ! mm subsystem has verified that this vmap can be written to, but in the page table it's marked as read-only |
| 201 | // * currently, only CoW pages have this property, we treat this as a CoW page |
| 202 | if (info->is_present && info->is_write) |
| 203 | { |
| 204 | if (pagecache_page == info->faulting_page) |
| 205 | vmap_stat_dec(vmap, pagecache); // the faulting page is a pagecache page |
| 206 | else |
| 207 | vmap_stat_dec(vmap, cow); // the faulting page is a COW page |
| 208 | vmap_stat_inc(vmap, regular); |
| 209 | return mm_resolve_cow_fault(vmap, fault_addr, info); // resolve by copying data page into prevate page |
| 210 | } |
| 211 | |
| 212 | info->backing_page = pagecache_page.get(); |
| 213 | if (vmap->type == VMAP_TYPE_PRIVATE) |
| 214 | { |
| 215 | if (info->is_write) |
| 216 | { |
| 217 | vmap_stat_inc(vmap, regular); |
| 218 | // present pages are handled above |
| 219 | MOS_ASSERT(!info->is_present); |
| 220 | return VMFAULT_COPY_BACKING_PAGE; // copy and (also) map the backing page |
| 221 | } |
| 222 | else |
| 223 | { |
| 224 | vmap_stat_inc(vmap, pagecache); |
| 225 | vmap_stat_inc(vmap, cow); |
| 226 | return VMFAULT_MAP_BACKING_PAGE_RO; |
| 227 | } |
| 228 | } |
| 229 | else |
| 230 | { |
| 231 | vmap_stat_inc(vmap, pagecache); |
| 232 | vmap_stat_inc(vmap, regular); |
| 233 | return VMFAULT_MAP_BACKING_PAGE; |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | bool FsFile::on_mmap(vmap_t *vmap, off_t offset) |
| 238 | { |
| 239 | const file_ops_t *const file_ops = get_ops(); |
| 240 | |
| 241 | MOS_ASSERT(!vmap->on_fault); // there should be no fault handler set |
| 242 | vmap->on_fault = vfs_fault_handler; |
| 243 | |
| 244 | if (file_ops->mmap) |
| 245 | return file_ops->mmap(this, vmap, offset); |
| 246 | |
| 247 | return true; |
| 248 | } |
| 249 | |
| 250 | bool FsFile::on_munmap(vmap_t *vmap, bool *unmapped) |
| 251 | { |
| 252 | const file_ops_t *const file_ops = get_ops(); |
| 253 | |
| 254 | if (file_ops->munmap) |
| 255 | return file_ops->munmap(this, vmap, unmapped); |
| 256 | |
| 257 | return true; |
| 258 | } |
| 259 | |
| 260 | // static const io_op_t file_io_ops = { |
| 261 | // .read = vfs_io_ops_read, |
| 262 | // .write = vfs_io_ops_write, |
| 263 | // .close = vfs_io_ops_close, |
| 264 | // .seek = vfs_io_ops_seek, |
| 265 | // .mmap = vfs_io_ops_mmap, |
| 266 | // .munmap = vfs_io_ops_munmap, |
| 267 | // }; |
| 268 | |
| 269 | // static const io_op_t dir_io_ops = { |
| 270 | // .read = vfs_list_dir, |
| 271 | // .close = vfs_io_ops_close_dir, |
| 272 | // }; |
| 273 | |
| 274 | // END: filesystem's IO operations |
| 275 | |
| 276 | static __used void vfs_flusher_entry(void *arg) |
| 277 | { |
| 278 | MOS_UNUSED(arg); |
| 279 | while (true) |
| 280 | { |
| 281 | timer_msleep(ms: 10 * 1000); |
| 282 | // pagecache_flush_all(); |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | static void vfs_flusher_init(void) |
| 287 | { |
| 288 | // kthread_create(vfs_flusher_entry, NULL, "vfs_flusher"); |
| 289 | } |
| 290 | MOS_INIT(KTHREAD, vfs_flusher_init); |
| 291 | |
| 292 | static void vfs_copy_stat(file_stat_t *statbuf, inode_t *inode) |
| 293 | { |
| 294 | statbuf->ino = inode->ino; |
| 295 | statbuf->type = inode->type; |
| 296 | statbuf->perm = inode->perm; |
| 297 | statbuf->size = inode->size; |
| 298 | statbuf->uid = inode->uid; |
| 299 | statbuf->gid = inode->gid; |
| 300 | statbuf->sticky = inode->sticky; |
| 301 | statbuf->suid = inode->suid; |
| 302 | statbuf->sgid = inode->sgid; |
| 303 | statbuf->nlinks = inode->nlinks; |
| 304 | statbuf->accessed = inode->accessed; |
| 305 | statbuf->modified = inode->modified; |
| 306 | statbuf->created = inode->created; |
| 307 | } |
| 308 | |
| 309 | static filesystem_t *vfs_find_filesystem(mos::string_view name) |
| 310 | { |
| 311 | SpinLocker lock(&vfs_fs_list_lock); |
| 312 | list_foreach(filesystem_t, fs, vfs_fs_list) |
| 313 | { |
| 314 | if (fs->name == name) |
| 315 | return fs; |
| 316 | } |
| 317 | |
| 318 | return nullptr; |
| 319 | } |
| 320 | |
| 321 | static bool vfs_verify_permissions(dentry_t &file_dentry, bool open, bool read, bool create, bool execute, bool write) |
| 322 | { |
| 323 | MOS_ASSERT(file_dentry.inode); |
| 324 | const file_perm_t file_perm = file_dentry.inode->perm; |
| 325 | |
| 326 | // TODO: we are treating all users as root for now, only checks for execute permission |
| 327 | MOS_UNUSED(open); |
| 328 | MOS_UNUSED(read); |
| 329 | MOS_UNUSED(create); |
| 330 | MOS_UNUSED(write); |
| 331 | |
| 332 | if (execute && !(file_perm & PERM_EXEC)) |
| 333 | return false; // execute permission denied |
| 334 | |
| 335 | return true; |
| 336 | } |
| 337 | |
| 338 | static PtrResult<FsBaseFile> vfs_do_open(dentry_t *base, mos::string_view path, OpenFlags flags) |
| 339 | { |
| 340 | if (base == NULL) |
| 341 | return -EINVAL; |
| 342 | |
| 343 | const bool may_create = flags & OPEN_CREATE; |
| 344 | const bool read = flags & OPEN_READ; |
| 345 | const bool write = flags & OPEN_WRITE; |
| 346 | const bool exec = flags & OPEN_EXECUTE; |
| 347 | const bool no_follow = flags & OPEN_NO_FOLLOW; |
| 348 | const bool expect_dir = flags & OPEN_DIR; |
| 349 | const bool truncate = flags & OPEN_TRUNCATE; |
| 350 | |
| 351 | LastSegmentResolveFlags resolve_flags = RESOLVE_EXPECT_FILE; |
| 352 | if (no_follow) |
| 353 | resolve_flags |= RESOLVE_SYMLINK_NOFOLLOW; |
| 354 | if (may_create) |
| 355 | resolve_flags |= RESOLVE_EXPECT_ANY_EXIST; |
| 356 | if (expect_dir) |
| 357 | resolve_flags |= RESOLVE_EXPECT_DIR; |
| 358 | |
| 359 | auto entry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: resolve_flags); |
| 360 | if (entry.isErr()) |
| 361 | { |
| 362 | dInfo2<vfs> << "failed to resolve '" << path << "': create=" << may_create << ", r=" << read << ", x=" << exec << ", nofollow=" << no_follow |
| 363 | << ", dir=" << expect_dir << ", truncate=" << truncate; |
| 364 | return entry.getErr(); |
| 365 | } |
| 366 | |
| 367 | bool created = false; |
| 368 | |
| 369 | if (may_create && entry->inode == NULL) |
| 370 | { |
| 371 | auto parent = dentry_parent(dentry: *entry); |
| 372 | if (!parent->inode->ops->newfile) |
| 373 | { |
| 374 | dentry_unref(dentry: entry.get()); |
| 375 | return -EROFS; |
| 376 | } |
| 377 | |
| 378 | if (!parent->inode->ops->newfile(parent->inode, entry.get(), FILE_TYPE_REGULAR, 0666)) |
| 379 | { |
| 380 | dentry_unref(dentry: entry.get()); |
| 381 | return -EIO; // failed to create file |
| 382 | } |
| 383 | |
| 384 | created = true; |
| 385 | } |
| 386 | |
| 387 | if (!vfs_verify_permissions(file_dentry&: *entry, open: true, read, create: may_create, execute: exec, write)) |
| 388 | { |
| 389 | dentry_unref(dentry: entry.get()); |
| 390 | return -EACCES; |
| 391 | } |
| 392 | |
| 393 | auto file = vfs_do_open_dentry(entry: entry.get(), created, read, write, exec, truncate); |
| 394 | if (file.isErr()) |
| 395 | { |
| 396 | dentry_unref(dentry: entry.get()); |
| 397 | return file.getErr(); |
| 398 | } |
| 399 | |
| 400 | return file; |
| 401 | } |
| 402 | |
| 403 | mos::string FsBaseFile::name() const |
| 404 | { |
| 405 | return dentry_path(dentry, root: root_dentry).value_or(u: "<unknown>" ); |
| 406 | } |
| 407 | |
| 408 | // public functions |
| 409 | PtrResult<FsBaseFile> vfs_do_open_dentry(dentry_t *dentry, bool created, bool read, bool write, bool exec, bool truncate) |
| 410 | { |
| 411 | MOS_ASSERT(dentry->inode); |
| 412 | MOS_UNUSED(truncate); |
| 413 | |
| 414 | Flags io_flags = IO_SEEKABLE; |
| 415 | |
| 416 | if (read) |
| 417 | io_flags |= IO_READABLE; |
| 418 | |
| 419 | if (write) |
| 420 | io_flags |= IO_WRITABLE; |
| 421 | |
| 422 | if (exec) |
| 423 | io_flags |= IO_EXECUTABLE; |
| 424 | |
| 425 | // only regular files are mmapable |
| 426 | if (dentry->inode->type == FILE_TYPE_REGULAR) |
| 427 | io_flags |= IO_MMAPABLE; |
| 428 | |
| 429 | FsBaseFile *file = nullptr; |
| 430 | |
| 431 | if (dentry->inode->type == FILE_TYPE_DIRECTORY) |
| 432 | file = mos::create<FsDir>(args: (io_flags | IO_READABLE).erase(b: IO_SEEKABLE), args&: dentry); |
| 433 | else |
| 434 | file = mos::create<FsFile>(args&: io_flags, args&: dentry); |
| 435 | |
| 436 | const file_ops_t *ops = file->get_ops(); |
| 437 | if (ops && ops->open) |
| 438 | { |
| 439 | bool opened = ops->open(file->dentry->inode, file, created); |
| 440 | if (!opened) |
| 441 | { |
| 442 | delete file; |
| 443 | return -ENOTSUP; |
| 444 | } |
| 445 | } |
| 446 | |
| 447 | return file; |
| 448 | } |
| 449 | |
| 450 | void vfs_register_filesystem(filesystem_t *fs) |
| 451 | { |
| 452 | if (vfs_find_filesystem(name: fs->name)) |
| 453 | mos_panic("filesystem '%s' already registered" , fs->name.c_str()); |
| 454 | |
| 455 | MOS_ASSERT(list_is_empty(list_node(fs))); |
| 456 | |
| 457 | spinlock_acquire(&vfs_fs_list_lock); |
| 458 | list_node_append(head: &vfs_fs_list, list_node(fs)); |
| 459 | spinlock_release(&vfs_fs_list_lock); |
| 460 | |
| 461 | dInfo2<vfs> << "filesystem '" << fs->name << "' registered" ; |
| 462 | } |
| 463 | |
| 464 | PtrResult<void> vfs_mount(const char *device, const char *path, const char *fs, const char *options) |
| 465 | { |
| 466 | filesystem_t *real_fs = vfs_find_filesystem(name: fs); |
| 467 | if (unlikely(real_fs == NULL)) |
| 468 | { |
| 469 | mos_warn("filesystem '%s' not found" , fs); |
| 470 | return -EINVAL; |
| 471 | } |
| 472 | |
| 473 | MOS_ASSERT_X(real_fs->mount, "filesystem '%s' does not support mounting" , real_fs->name.c_str()); |
| 474 | |
| 475 | if (unlikely(strcmp(path, "/" ) == 0)) |
| 476 | { |
| 477 | // special case: mount root filesystem |
| 478 | if (root_dentry) |
| 479 | { |
| 480 | mWarn << "root filesystem is already mounted" ; |
| 481 | return -EBUSY; |
| 482 | } |
| 483 | dInfo2<vfs> << "mounting root filesystem '" << fs << "'..." ; |
| 484 | const auto mountResult = real_fs->mount(real_fs, device, options); |
| 485 | if (mountResult.isErr()) |
| 486 | { |
| 487 | mWarn << "failed to mount root filesystem" ; |
| 488 | return -EIO; |
| 489 | } |
| 490 | else |
| 491 | { |
| 492 | root_dentry = mountResult.get(); |
| 493 | } |
| 494 | |
| 495 | dInfo2<vfs> << "root filesystem mounted, dentry=" << (void *) root_dentry; |
| 496 | |
| 497 | MOS_ASSERT(root_dentry->name.empty()); |
| 498 | bool mounted = dentry_mount(mountpoint: root_dentry, root: root_dentry, fs: real_fs); |
| 499 | MOS_ASSERT(mounted); |
| 500 | |
| 501 | return 0; |
| 502 | } |
| 503 | |
| 504 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
| 505 | if (base.isErr()) |
| 506 | return base.getErr(); |
| 507 | |
| 508 | auto mpRoot = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_DIR | RESOLVE_EXPECT_EXIST); |
| 509 | if (mpRoot.isErr()) |
| 510 | return mpRoot.getErr(); |
| 511 | |
| 512 | if (mpRoot->is_mountpoint) |
| 513 | { |
| 514 | // we don't support overlaying filesystems yet |
| 515 | mWarn << "mount point is already mounted" ; |
| 516 | dentry_unref(dentry: mpRoot.get()); |
| 517 | return -ENOTSUP; |
| 518 | } |
| 519 | |
| 520 | // when mounting: |
| 521 | // mounted_root will have a reference of 1 |
| 522 | // the mount_point will have its reference incremented by 1 |
| 523 | auto mounted_root = real_fs->mount(real_fs, device, options); |
| 524 | if (mounted_root.isErr()) |
| 525 | { |
| 526 | mWarn << "failed to mount filesystem" ; |
| 527 | return mounted_root.getErr(); |
| 528 | } |
| 529 | |
| 530 | const bool mounted = dentry_mount(mountpoint: mpRoot.get(), root: mounted_root.get(), fs: real_fs); |
| 531 | if (unlikely(!mounted)) |
| 532 | { |
| 533 | mWarn << "failed to mount filesystem" ; |
| 534 | return -EIO; |
| 535 | } |
| 536 | |
| 537 | MOS_ASSERT_X(mpRoot->refcount == mounted_root->refcount, "mountpoint refcount=%zu, mounted_root refcount=%zu" , mpRoot->refcount.load(), |
| 538 | mounted_root->refcount.load()); |
| 539 | dInfo2<vfs> << "mounted filesystem '" << fs << "' on '" << path << "'" ; |
| 540 | return 0; |
| 541 | } |
| 542 | |
| 543 | long vfs_unmount(const char *path) |
| 544 | { |
| 545 | auto mounted_root = dentry_resolve(starting_dir: root_dentry, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_DIR | RESOLVE_EXPECT_EXIST); |
| 546 | if (mounted_root.isErr()) |
| 547 | return mounted_root.getErr(); |
| 548 | |
| 549 | // the mounted root itself holds a ref, and the caller of this function |
| 550 | if (mounted_root->refcount != 2) |
| 551 | { |
| 552 | dentry_check_refstat(dentry: mounted_root.get()); |
| 553 | mWarn << "refcount is not as expected" ; |
| 554 | return -EBUSY; |
| 555 | } |
| 556 | |
| 557 | dentry_unref(dentry: mounted_root.get()); // release the reference held by this function |
| 558 | |
| 559 | // unmounting root filesystem |
| 560 | auto mountpoint = dentry_unmount(root: mounted_root.get()); |
| 561 | if (!mountpoint) |
| 562 | { |
| 563 | mWarn << "failed to unmount filesystem" ; |
| 564 | return -EIO; |
| 565 | } |
| 566 | |
| 567 | MOS_ASSERT(mounted_root->refcount == mountpoint->refcount && mountpoint->refcount == 1); |
| 568 | if (mounted_root->superblock->fs->unmount) |
| 569 | mounted_root->superblock->fs->unmount(mounted_root->superblock->fs, mounted_root.get()); |
| 570 | else |
| 571 | MOS_ASSERT(dentry_unref_one_norelease(mounted_root.get())); |
| 572 | MOS_ASSERT_X(mounted_root->refcount == 0, "fs->umount should release the last reference to the mounted root" ); |
| 573 | |
| 574 | if (mounted_root == root_dentry) |
| 575 | { |
| 576 | dInfo2<vfs> << "unmounted root filesystem" ; |
| 577 | root_dentry = NULL; |
| 578 | return 0; |
| 579 | } |
| 580 | |
| 581 | dentry_unref(dentry: mountpoint); |
| 582 | return 0; |
| 583 | } |
| 584 | |
| 585 | PtrResult<FsBaseFile> vfs_openat(int fd, mos::string_view path, OpenFlags flags) |
| 586 | { |
| 587 | dInfo2<vfs> << "vfs_openat(fd=" << fd << ", path='" << path << "', flags=" << flags << ")" ; |
| 588 | auto basedir = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd); |
| 589 | if (basedir.isErr()) |
| 590 | return basedir.getErr(); |
| 591 | |
| 592 | auto file = vfs_do_open(base: basedir.get(), path, flags); |
| 593 | return file; |
| 594 | } |
| 595 | |
| 596 | long vfs_fstatat(fd_t fd, const char *path, file_stat_t *__restrict statbuf, FStatAtFlags flags) |
| 597 | { |
| 598 | if (flags & FSTATAT_FILE) |
| 599 | { |
| 600 | dInfo2<vfs> << "vfs_fstatat(fd=" << fd << ", path=" << (void *) path << ", stat=" << (void *) statbuf << ", flags=" << flags << ")" ; |
| 601 | IO *io = process_get_fd(current_process, fd); |
| 602 | if (!(IO::IsValid(io) && (io->io_type == IO_FILE || io->io_type == IO_DIR))) |
| 603 | return -EBADF; // io is closed, or is not a file or directory |
| 604 | |
| 605 | FsBaseFile *file = static_cast<FsBaseFile *>(io); |
| 606 | MOS_ASSERT(file); |
| 607 | if (statbuf) |
| 608 | vfs_copy_stat(statbuf, inode: file->dentry->inode); |
| 609 | |
| 610 | return 0; |
| 611 | } |
| 612 | |
| 613 | dInfo2<vfs> << "vfs_fstatat(fd=" << fd << ", path='" << path << "', stat=" << (void *) statbuf << ", flags=" << flags << ")" ; |
| 614 | auto basedir = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd); |
| 615 | if (basedir.isErr()) |
| 616 | return basedir.getErr(); |
| 617 | |
| 618 | LastSegmentResolveFlags resolve_flags = RESOLVE_EXPECT_ANY_TYPE | RESOLVE_EXPECT_EXIST; |
| 619 | if (flags & FSTATAT_NOFOLLOW) |
| 620 | resolve_flags |= RESOLVE_SYMLINK_NOFOLLOW; |
| 621 | |
| 622 | auto dentry = dentry_resolve(starting_dir: basedir.get(), root_dir: root_dentry, path, flags: resolve_flags); |
| 623 | if (dentry.isErr()) |
| 624 | return dentry.getErr(); |
| 625 | |
| 626 | if (statbuf) |
| 627 | vfs_copy_stat(statbuf, inode: dentry->inode); |
| 628 | dentry_unref(dentry: dentry.get()); |
| 629 | return 0; |
| 630 | } |
| 631 | |
| 632 | size_t vfs_readlinkat(fd_t dirfd, const char *path, char *buf, size_t size) |
| 633 | { |
| 634 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd); |
| 635 | if (base.isErr()) |
| 636 | return base.getErr(); |
| 637 | |
| 638 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_SYMLINK_NOFOLLOW | RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_FILE); |
| 639 | if (dentry.isErr()) |
| 640 | return dentry.getErr(); |
| 641 | |
| 642 | if (dentry->inode->type != FILE_TYPE_SYMLINK) |
| 643 | { |
| 644 | dentry_unref(dentry: dentry.get()); |
| 645 | return -EINVAL; |
| 646 | } |
| 647 | |
| 648 | const size_t len = dentry->inode->ops->readlink(dentry.get(), buf, size); |
| 649 | |
| 650 | dentry_unref(dentry: dentry.get()); |
| 651 | |
| 652 | if (len >= size) // buffer too small |
| 653 | return -ENAMETOOLONG; |
| 654 | |
| 655 | return len; |
| 656 | } |
| 657 | |
| 658 | long vfs_symlink(const char *path, const char *target) |
| 659 | { |
| 660 | dInfo2<vfs> << "vfs_symlink(path='" << path << "', target='" << target << "')" ; |
| 661 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
| 662 | if (base.isErr()) |
| 663 | return base.getErr(); |
| 664 | |
| 665 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_NONEXIST); |
| 666 | if (dentry.isErr()) |
| 667 | return dentry.getErr(); |
| 668 | |
| 669 | dentry_t *parent_dir = dentry_parent(dentry: *dentry); |
| 670 | const bool created = parent_dir->inode->ops->symlink(parent_dir->inode, dentry.get(), target); |
| 671 | |
| 672 | if (!created) |
| 673 | mos_warn("failed to create symlink '%s'" , path); |
| 674 | |
| 675 | dentry_unref(dentry: dentry.get()); |
| 676 | return created ? 0 : -EIO; |
| 677 | } |
| 678 | |
| 679 | PtrResult<void> vfs_mkdir(const char *path) |
| 680 | { |
| 681 | dInfo2<vfs> << "vfs_mkdir('" << path << "')" ; |
| 682 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
| 683 | if (base.isErr()) |
| 684 | return base.getErr(); |
| 685 | |
| 686 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_NONEXIST); |
| 687 | if (dentry.isErr()) |
| 688 | return dentry.getErr(); |
| 689 | |
| 690 | dentry_t *parent_dir = dentry_parent(dentry: *dentry); |
| 691 | if (parent_dir->inode == NULL || parent_dir->inode->ops == NULL || parent_dir->inode->ops->mkdir == NULL) |
| 692 | { |
| 693 | // dentry does not have a mkdir operation |
| 694 | dentry_unref(dentry: dentry.get()); |
| 695 | return -ENOTSUP; |
| 696 | } |
| 697 | |
| 698 | // TODO: use umask or something else |
| 699 | const bool created = parent_dir->inode->ops->mkdir(parent_dir->inode, dentry.get(), parent_dir->inode->perm); |
| 700 | |
| 701 | if (!created) |
| 702 | mos_warn("failed to create directory '%s'" , path); |
| 703 | |
| 704 | dentry_unref(dentry: dentry.get()); |
| 705 | return created ? 0 : -EIO; |
| 706 | } |
| 707 | |
| 708 | PtrResult<void> vfs_rmdir(const char *path) |
| 709 | { |
| 710 | dInfo2<vfs> << "vfs_rmdir('" << path << "')" ; |
| 711 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD); |
| 712 | if (base.isErr()) |
| 713 | return base.getErr(); |
| 714 | |
| 715 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_DIR); |
| 716 | if (dentry.isErr()) |
| 717 | return dentry.getErr(); |
| 718 | |
| 719 | dentry_t *parent_dir = dentry_parent(dentry: *dentry); |
| 720 | if (parent_dir->inode == NULL || parent_dir->inode->ops == NULL || parent_dir->inode->ops->rmdir == NULL) |
| 721 | { |
| 722 | dentry_unref(dentry: dentry.get()); |
| 723 | return -ENOTSUP; |
| 724 | } |
| 725 | |
| 726 | const bool removed = parent_dir->inode->ops->rmdir(parent_dir->inode, dentry.get()); |
| 727 | |
| 728 | if (!removed) |
| 729 | mos_warn("failed to remove directory '%s'" , path); |
| 730 | |
| 731 | dentry_unref(dentry: dentry.get()); |
| 732 | return removed ? 0 : -EIO; |
| 733 | } |
| 734 | |
| 735 | size_t vfs_list_dir(IO *io, void *user_buf, size_t user_size) |
| 736 | { |
| 737 | dInfo2<vfs> << "vfs_list_dir(io=" << (void *) io << ", buf=" << (void *) user_buf << ", size=" << user_size << ")" ; |
| 738 | FsBaseFile *file = static_cast<FsBaseFile *>(io); |
| 739 | if (unlikely(file->dentry->inode->type != FILE_TYPE_DIRECTORY)) |
| 740 | { |
| 741 | mos_warn("not a directory" ); |
| 742 | return 0; |
| 743 | } |
| 744 | |
| 745 | if (file->private_data == NULL) |
| 746 | { |
| 747 | vfs_listdir_state_t *const state = mos::create<vfs_listdir_state_t>(); |
| 748 | file->private_data = state; |
| 749 | linked_list_init(head_node: &state->entries); |
| 750 | state->n_count = state->read_offset = 0; |
| 751 | vfs_populate_listdir_buf(dir: file->dentry, state); |
| 752 | } |
| 753 | |
| 754 | vfs_listdir_state_t *const state = (vfs_listdir_state_t *) file->private_data; |
| 755 | |
| 756 | if (state->read_offset >= state->n_count) |
| 757 | return 0; // no more entries |
| 758 | |
| 759 | size_t bytes_copied = 0; |
| 760 | size_t i = 0; |
| 761 | list_foreach(vfs_listdir_entry_t, entry, state->entries) |
| 762 | { |
| 763 | if (i++ < state->read_offset) |
| 764 | continue; // skip the entries we have already read |
| 765 | |
| 766 | if (state->read_offset >= state->n_count) |
| 767 | break; |
| 768 | |
| 769 | const size_t entry_size = sizeof(ino_t) + sizeof(off_t) + sizeof(short) + sizeof(char) + entry->name.size() + 1; // +1 for the null terminator |
| 770 | if (bytes_copied + entry_size > user_size) |
| 771 | break; |
| 772 | |
| 773 | struct dirent *dirent = (struct dirent *) (((char *) user_buf) + bytes_copied); |
| 774 | dirent->d_ino = entry->ino; |
| 775 | dirent->d_type = entry->type; |
| 776 | dirent->d_reclen = entry_size; |
| 777 | dirent->d_off = entry_size - 1; |
| 778 | memcpy(dest: dirent->d_name, src: entry->name.data(), n: entry->name.size()); |
| 779 | dirent->d_name[entry->name.size()] = '\0'; |
| 780 | bytes_copied += entry_size; |
| 781 | state->read_offset++; |
| 782 | } |
| 783 | |
| 784 | return bytes_copied; |
| 785 | } |
| 786 | |
| 787 | long vfs_chdirat(fd_t dirfd, const char *path) |
| 788 | { |
| 789 | dInfo2<vfs> << "vfs_chdirat('" << dirfd << ", " << path << "')" ; |
| 790 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd); |
| 791 | if (base.isErr()) |
| 792 | return base.getErr(); |
| 793 | |
| 794 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_DIR); |
| 795 | if (dentry.isErr()) |
| 796 | return dentry.getErr(); |
| 797 | |
| 798 | auto old_cwd = dentry_from_fd(AT_FDCWD); |
| 799 | if (old_cwd) |
| 800 | dentry_unref(dentry: old_cwd.get()); |
| 801 | |
| 802 | current_process->working_directory = dentry.get(); |
| 803 | return 0; |
| 804 | } |
| 805 | |
| 806 | ssize_t vfs_getcwd(char *buf, size_t size) |
| 807 | { |
| 808 | dInfo2<vfs> << "vfs_getcwd(buf=" << (void *) buf << ", size=" << size << ")" ; |
| 809 | auto cwd = dentry_from_fd(AT_FDCWD); |
| 810 | if (cwd.isErr()) |
| 811 | return cwd.getErr(); |
| 812 | |
| 813 | const auto path = dentry_path(dentry: cwd.get(), root: root_dentry); |
| 814 | if (!path) |
| 815 | return -ENOMEM; |
| 816 | |
| 817 | const size_t n = path->copy(buffer: buf, size); |
| 818 | |
| 819 | if (n != path->size()) |
| 820 | return -ERANGE; // buffer too small |
| 821 | |
| 822 | if (n >= size) |
| 823 | return -ERANGE; // buffer too small |
| 824 | |
| 825 | buf[n] = '\0'; // null-terminate the string |
| 826 | return n; |
| 827 | } |
| 828 | |
| 829 | long vfs_fchmodat(fd_t fd, const char *path, int perm, int flags) |
| 830 | { |
| 831 | dInfo2<vfs> << "vfs_fchmodat(fd=" << fd << ", path='" << path << "', perm=" << perm << ", flags=" << flags << ")" ; |
| 832 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd); |
| 833 | if (base.isErr()) |
| 834 | return base.getErr(); |
| 835 | |
| 836 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_ANY_TYPE); |
| 837 | if (dentry.isErr()) |
| 838 | return dentry.getErr(); |
| 839 | |
| 840 | // TODO: check if the underlying filesystem supports chmod, and is not read-only |
| 841 | dentry->inode->perm = perm; |
| 842 | dentry_unref(dentry: dentry.get()); |
| 843 | return 0; |
| 844 | } |
| 845 | |
| 846 | long vfs_unlinkat(fd_t dirfd, const char *path) |
| 847 | { |
| 848 | dInfo2<vfs> << "vfs_unlinkat(dirfd=" << dirfd << ", path='" << path << "')" ; |
| 849 | auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd); |
| 850 | if (base.isErr()) |
| 851 | return base.getErr(); |
| 852 | |
| 853 | auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST | RESOLVE_EXPECT_FILE | RESOLVE_SYMLINK_NOFOLLOW); |
| 854 | if (dentry.isErr()) |
| 855 | return dentry.getErr(); |
| 856 | |
| 857 | dentry_t *parent_dir = dentry_parent(dentry: *dentry); |
| 858 | if (parent_dir->inode == NULL || parent_dir->inode->ops == NULL || parent_dir->inode->ops->unlink == NULL) |
| 859 | { |
| 860 | dentry_unref(dentry: dentry.get()); |
| 861 | return -ENOTSUP; |
| 862 | } |
| 863 | |
| 864 | if (!inode_unlink(dir: parent_dir->inode, dentry: dentry.get())) |
| 865 | { |
| 866 | dentry_unref(dentry: dentry.get()); |
| 867 | return -EIO; |
| 868 | } |
| 869 | |
| 870 | dentry_unref(dentry: dentry.get()); // it won't release dentry because dentry->inode is still valid |
| 871 | dentry_detach(dentry: dentry.get()); |
| 872 | dentry_try_release(dentry: dentry.get()); |
| 873 | return 0; |
| 874 | } |
| 875 | |
| 876 | long vfs_fsync(IO *io, bool sync_metadata, off_t start, off_t end) |
| 877 | { |
| 878 | dInfo2<vfs> << "vfs_fsync(io=" << (void *) io << ", sync_metadata=" << sync_metadata << ", start=" << start << ", end=" << end << ")" ; |
| 879 | FsBaseFile *file = static_cast<FsBaseFile *>(io); |
| 880 | |
| 881 | const off_t nbytes = end - start; |
| 882 | const off_t npages = ALIGN_UP_TO_PAGE(nbytes) / MOS_PAGE_SIZE; |
| 883 | const off_t pgoffset = start / MOS_PAGE_SIZE; |
| 884 | |
| 885 | long ret = do_pagecache_flush(file, pgoff: pgoffset, npages); |
| 886 | if (ret < 0) |
| 887 | return ret; |
| 888 | |
| 889 | if (sync_metadata) |
| 890 | { |
| 891 | ret = do_sync_inode(file); |
| 892 | if (ret < 0) |
| 893 | return ret; |
| 894 | } |
| 895 | |
| 896 | return ret; |
| 897 | } |
| 898 | |
| 899 | // ! sysfs support |
| 900 | |
| 901 | static bool vfs_sysfs_filesystems(sysfs_file_t *f) |
| 902 | { |
| 903 | list_foreach(filesystem_t, fs, vfs_fs_list) |
| 904 | { |
| 905 | sysfs_printf(file: f, fmt: "%s\n" , fs->name.c_str()); |
| 906 | } |
| 907 | |
| 908 | return true; |
| 909 | } |
| 910 | |
| 911 | static bool vfs_sysfs_mountpoints(sysfs_file_t *f) |
| 912 | { |
| 913 | list_foreach(mount_t, mp, vfs_mountpoint_list) |
| 914 | { |
| 915 | const auto str = dentry_path(dentry: mp->mountpoint, root: root_dentry); |
| 916 | if (str) |
| 917 | sysfs_printf(file: f, fmt: "%-20s %-10s\n" , str->c_str(), mp->fs->name.c_str()); |
| 918 | else |
| 919 | sysfs_printf(file: f, fmt: "%-20s %-10s\n" , "<error>" , mp->fs->name.c_str()); |
| 920 | } |
| 921 | |
| 922 | return true; |
| 923 | } |
| 924 | |
| 925 | static void vfs_sysfs_dentry_stats_stat_receiver(int depth, const dentry_t *dentry, bool mountroot, void *data) |
| 926 | { |
| 927 | sysfs_file_t *file = (sysfs_file_t *) data; |
| 928 | sysfs_printf(file, fmt: "%*s%s: refcount=%zu%s\n" , // |
| 929 | depth * 4, // |
| 930 | "" , // |
| 931 | dentry_name(dentry).c_str(), // |
| 932 | dentry->refcount.load(), // |
| 933 | mountroot ? " (mount root)" : (dentry->is_mountpoint ? " (mountpoint)" : "" ) // |
| 934 | ); |
| 935 | } |
| 936 | |
| 937 | static bool vfs_sysfs_dentry_stats(sysfs_file_t *f) |
| 938 | { |
| 939 | dentry_dump_refstat(dentry: root_dentry, receiver: vfs_sysfs_dentry_stats_stat_receiver, data: f); |
| 940 | return true; |
| 941 | } |
| 942 | |
| 943 | static sysfs_item_t vfs_sysfs_items[] = { |
| 944 | SYSFS_RO_ITEM("filesystems" , vfs_sysfs_filesystems), |
| 945 | SYSFS_RO_ITEM("mount" , vfs_sysfs_mountpoints), |
| 946 | SYSFS_RO_ITEM("dentry_stats" , vfs_sysfs_dentry_stats), |
| 947 | }; |
| 948 | |
| 949 | SYSFS_AUTOREGISTER(vfs, vfs_sysfs_items); |
| 950 | |