vfs.cpp source code [MOS/kernel/filesystem/vfs.cpp]

1	// SPDX-License-Identifier: GPL-3.0-or-later
2
3	#include "mos/assert.hpp"
4	#include "mos/device/timer.hpp"
5	#include "mos/filesystem/inode.hpp"
6	#include "mos/filesystem/mount.hpp"
7	#include "mos/filesystem/page_cache.hpp"
8	#include "mos/filesystem/sysfs/sysfs.hpp"
9	#include "mos/filesystem/sysfs/sysfs_autoinit.hpp"
10	#include "mos/mm/mm.hpp"
11	#include "mos/mm/mmstat.hpp"
12
13	#include <algorithm>
14	#include <dirent.h>
15	#include <errno.h>
16	#include <mos/filesystem/dentry.hpp>
17	#include <mos/filesystem/fs_types.h>
18	#include <mos/filesystem/vfs.hpp>
19	#include <mos/filesystem/vfs_types.hpp>
20	#include <mos/io/io.hpp>
21	#include <mos/lib/structures/list.hpp>
22	#include <mos/lib/structures/tree.hpp>
23	#include <mos/lib/sync/spinlock.hpp>
24	#include <mos/mos_global.h>
25	#include <mos/platform/platform.hpp>
26	#include <mos/syslog/printk.hpp>
27	#include <mos/tasks/process.hpp>
28	#include <mos/types.hpp>
29	#include <mos_stdlib.hpp>
30	#include <mos_string.hpp>
31
32	static list_head vfs_fs_list; // filesystem_t
33	static spinlock_t vfs_fs_list_lock;
34
35	dentry_t *root_dentry = NULL;
36
37	static long do_pagecache_flush(file_t *file, off_t pgoff, size_t npages)
38	{
39	pr_dinfo2(vfs, "vfs: flushing page cache for file %pio", (void *) &file->io);
40
41	mutex_acquire(mutex: &file->dentry->inode->cache.lock);
42	long ret = `0`;
43	if (pgoff == `0` && npages == (size_t) -`1`)
44	ret = pagecache_flush_or_drop_all(icache: &file->dentry->inode->cache, drop_page: false);
45	else
46	ret = pagecache_flush_or_drop(icache: &file->dentry->inode->cache, pgoff, npages, drop_page: false);
47
48	mutex_release(mutex: &file->dentry->inode->cache.lock);
49	return ret;
50	}
51
52	static long do_sync_inode(file_t *file)
53	{
54	const superblock_ops_t *ops = file->dentry->inode->superblock->ops;
55	if (ops && ops->sync_inode)
56	return ops->sync_inode(file->dentry->inode);
57
58	return `0`;
59	}
60
61	// BEGIN: filesystem's io_t operations
62	static void vfs_io_ops_close(io_t *io)
63	{
64	file_t *file = container_of(io, file_t, io);
65	if (io->type == IO_FILE && io->flags & IO_WRITABLE) // only flush if the file is writable
66	{
67	do_pagecache_flush(file, pgoff: `0`, npages: (off_t) -`1`);
68	do_sync_inode(file);
69	}
70
71	dentry_unref(dentry: file->dentry);
72
73	if (io->type == IO_FILE)
74	{
75	const file_ops_t *file_ops = file_get_ops(file);
76	if (file_ops)
77	{
78	if (file_ops->release)
79	file_ops->release(file);
80	}
81	}
82
83	delete file;
84	}
85
86	static void vfs_io_ops_close_dir(io_t *io)
87	{
88	file_t *file = container_of(io, file_t, io);
89
90	if (file->private_data)
91	{
92	vfs_listdir_state_t state = static_cast<vfs_listdir_state_t >(file->private_data);
93	list_foreach(vfs_listdir_entry_t, entry, state->entries)
94	{
95	list_remove(entry);
96	delete entry;
97	}
98
99	delete state;
100	file->private_data = NULL;
101	}
102
103	vfs_io_ops_close(io); // close the file
104	}
105
106	static size_t vfs_io_ops_read(io_t io, void* *buf, size_t count)
107	{
108	file_t *file = container_of(io, file_t, io);
109	const file_ops_t *const file_ops = file_get_ops(file);
110	if (!file_ops \|\| !file_ops->read)
111	return `0`;
112
113	spinlock_acquire(&file->offset_lock);
114	size_t ret = file_ops->read(file, buf, count, file->offset);
115	if (IS_ERR_VALUE(ret))
116	; // do nothing
117	else if (ret != (size_t) -`1`)
118	file->offset += ret;
119	spinlock_release(&file->offset_lock);
120
121	return ret;
122	}
123
124	static size_t vfs_io_ops_write(io_t io, const* void *buf, size_t count)
125	{
126	file_t *file = container_of(io, file_t, io);
127	const file_ops_t *const file_ops = file_get_ops(file);
128	if (!file_ops \|\| !file_ops->write)
129	return `0`;
130
131	spinlock_acquire(&file->offset_lock);
132	size_t ret = file_ops->write(file, buf, count, file->offset);
133	if (!IS_ERR_VALUE(ret))
134	file->offset += ret;
135	spinlock_release(&file->offset_lock);
136	return ret;
137	}
138
139	static off_t vfs_io_ops_seek(io_t *io, off_t offset, io_seek_whence_t whence)
140	{
141	file_t *file = container_of(io, file_t, io);
142
143	const file_ops_t *const ops = file_get_ops(file);
144	if (ops->seek)
145	return ops->seek(file, offset, whence); // use the filesystem's lseek if it exists
146
147	spinlock_acquire(&file->offset_lock);
148
149	switch (whence)
150	{
151	case IO_SEEK_SET:
152	{
153	file->offset = std::max(a: offset, b: `0l`);
154	break;
155	}
156	case IO_SEEK_CURRENT:
157	{
158	off_t new_offset = file->offset + offset;
159	new_offset = std::max(a: new_offset, b: `0l`);
160	file->offset = new_offset;
161	break;
162	}
163	case IO_SEEK_END:
164	{
165	off_t new_offset = file->dentry->inode->size + offset;
166	new_offset = std::max(a: new_offset, b: `0l`);
167	file->offset = new_offset;
168	break;
169	}
170	case IO_SEEK_DATA: mos_warn("vfs: IO_SEEK_DATA is not supported"); break;
171	case IO_SEEK_HOLE: mos_warn("vfs: IO_SEEK_HOLE is not supported"); break;
172	};
173
174	spinlock_release(&file->offset_lock);
175	return file->offset;
176	}
177
178	static vmfault_result_t vfs_fault_handler(vmap_t vmap, ptr_t fault_addr, pagefault_t info)
179	{
180	MOS_ASSERT(vmap->io);
181	file_t *file = container_of(vmap->io, file_t, io);
182	const size_t fault_pgoffset = (vmap->io_offset + ALIGN_DOWN_TO_PAGE(fault_addr) - vmap->vaddr) / MOS_PAGE_SIZE;
183
184	mutex_acquire(mutex: &file->dentry->inode->cache.lock); // lock the inode cache
185	auto pagecache_page = pagecache_get_page_for_read(cache: &file->dentry->inode->cache, pgoff: fault_pgoffset);
186	mutex_release(mutex: &file->dentry->inode->cache.lock);
187
188	if (pagecache_page.isErr())
189	return VMFAULT_CANNOT_HANDLE;
190
191	// ! mm subsystem has verified that this vmap can be written to, but in the page table it's marked as read-only
192	// currently, only CoW pages have this property, we treat this as a CoW page*
193	if (info->is_present && info->is_write)
194	{
195	if (pagecache_page == info->faulting_page)
196	vmap_stat_dec(vmap, pagecache); // the faulting page is a pagecache page
197	else
198	vmap_stat_dec(vmap, cow); // the faulting page is a COW page
199	vmap_stat_inc(vmap, regular);
200	return mm_resolve_cow_fault(vmap, fault_addr, info); // resolve by copying data page into prevate page
201	}
202
203	info->backing_page = pagecache_page.get();
204	if (vmap->type == VMAP_TYPE_PRIVATE)
205	{
206	if (info->is_write)
207	{
208	vmap_stat_inc(vmap, regular);
209	// present pages are handled above
210	MOS_ASSERT(!info->is_present);
211	return VMFAULT_COPY_BACKING_PAGE; // copy and (also) map the backing page
212	}
213	else
214	{
215	vmap_stat_inc(vmap, pagecache);
216	vmap_stat_inc(vmap, cow);
217	return VMFAULT_MAP_BACKING_PAGE_RO;
218	}
219	}
220	else
221	{
222	vmap_stat_inc(vmap, pagecache);
223	vmap_stat_inc(vmap, regular);
224	return VMFAULT_MAP_BACKING_PAGE;
225	}
226	}
227
228	static bool vfs_io_ops_mmap(io_t io, vmap_t vmap, off_t offset)
229	{
230	file_t *file = container_of(io, file_t, io);
231	const file_ops_t *const file_ops = file_get_ops(file);
232
233	MOS_ASSERT(!vmap->on_fault); // there should be no fault handler set
234	vmap->on_fault = vfs_fault_handler;
235
236	if (file_ops->mmap)
237	return file_ops->mmap(file, vmap, offset);
238
239	return true;
240	}
241
242	static bool vfs_io_ops_munmap(io_t io, vmap_t vmap, bool *unmapped)
243	{
244	file_t *file = container_of(io, file_t, io);
245	const file_ops_t *const file_ops = file_get_ops(file);
246
247	if (file_ops->munmap)
248	return file_ops->munmap(file, vmap, unmapped);
249
250	return true;
251	}
252
253	static void vfs_io_ops_getname(const io_t io, char* *buf, size_t size)
254	{
255	const file_t *file = container_of(io, file_t, io);
256	dentry_path(dentry: file->dentry, root: root_dentry, buf, size);
257	}
258
259	static const io_op_t file_io_ops = {
260	.read = vfs_io_ops_read,
261	.write = vfs_io_ops_write,
262	.close = vfs_io_ops_close,
263	.seek = vfs_io_ops_seek,
264	.mmap = vfs_io_ops_mmap,
265	.munmap = vfs_io_ops_munmap,
266	.get_name = vfs_io_ops_getname,
267	};
268
269	static const io_op_t dir_io_ops = {
270	.read = vfs_list_dir,
271	.close = vfs_io_ops_close_dir,
272	.get_name = vfs_io_ops_getname,
273	};
274
275	// END: filesystem's io_t operations
276
277	static __used void vfs_flusher_entry(void *arg)
278	{
279	MOS_UNUSED(arg);
280	while (true)
281	{
282	timer_msleep(ms: `10` * `1000`);
283	// pagecache_flush_all();
284	}
285	}
286
287	static void vfs_flusher_init(void)
288	{
289	// kthread_create(vfs_flusher_entry, NULL, "vfs_flusher");
290	}
291	MOS_INIT(KTHREAD, vfs_flusher_init);
292
293	static void vfs_copy_stat(file_stat_t statbuf, inode_t inode)
294	{
295	statbuf->ino = inode->ino;
296	statbuf->type = inode->type;
297	statbuf->perm = inode->perm;
298	statbuf->size = inode->size;
299	statbuf->uid = inode->uid;
300	statbuf->gid = inode->gid;
301	statbuf->sticky = inode->sticky;
302	statbuf->suid = inode->suid;
303	statbuf->sgid = inode->sgid;
304	statbuf->nlinks = inode->nlinks;
305	statbuf->accessed = inode->accessed;
306	statbuf->modified = inode->modified;
307	statbuf->created = inode->created;
308	}
309
310	static filesystem_t *vfs_find_filesystem(mos::string_view name)
311	{
312	SpinLocker lock(&vfs_fs_list_lock);
313	list_foreach(filesystem_t, fs, vfs_fs_list)
314	{
315	if (fs->name == name)
316	return fs;
317	}
318
319	return nullptr;
320	}
321
322	static bool vfs_verify_permissions(dentry_t &file_dentry, bool open, bool read, bool create, bool execute, bool write)
323	{
324	MOS_ASSERT(file_dentry.inode);
325	const file_perm_t file_perm = file_dentry.inode->perm;
326
327	// TODO: we are treating all users as root for now, only checks for execute permission
328	MOS_UNUSED(open);
329	MOS_UNUSED(read);
330	MOS_UNUSED(create);
331	MOS_UNUSED(write);
332
333	if (execute && !(file_perm & PERM_EXEC))
334	return false; // execute permission denied
335
336	return true;
337	}
338
339	static PtrResult<file_t> vfs_do_open(dentry_t base, const* char *path, open_flags flags)
340	{
341	if (base == NULL)
342	return -EINVAL;
343
344	const bool may_create = flags & OPEN_CREATE;
345	const bool read = flags & OPEN_READ;
346	const bool write = flags & OPEN_WRITE;
347	const bool exec = flags & OPEN_EXECUTE;
348	const bool no_follow = flags & OPEN_NO_FOLLOW;
349	const bool expect_dir = flags & OPEN_DIR;
350	const bool truncate = flags & OPEN_TRUNCATE;
351
352	lastseg_resolve_flags_t resolve_flags = RESOLVE_EXPECT_FILE;
353	if (no_follow)
354	resolve_flags \|= RESOLVE_SYMLINK_NOFOLLOW;
355	if (may_create)
356	resolve_flags \|= RESOLVE_EXPECT_ANY_EXIST;
357	if (expect_dir)
358	resolve_flags \|= RESOLVE_EXPECT_DIR;
359
360	auto entry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: resolve_flags);
361	if (entry.isErr())
362	{
363	pr_dinfo2(vfs, "failed to resolve '%s': create=%d, r=%d, x=%d, nofollow=%d, dir=%d, truncate=%d", path, may_create, read, exec, no_follow, expect_dir, truncate);
364	return entry.getErr();
365	}
366
367	bool created = false;
368
369	if (may_create && entry ->inode == NULL)
370	{
371	auto parent = dentry_parent(dentry: *entry);
372	if (!parent->inode->ops->newfile)
373	{
374	dentry_unref(dentry: entry.get());
375	return -EROFS;
376	}
377
378	if (!parent->inode->ops->newfile(parent->inode, entry.get(), FILE_TYPE_REGULAR, `0666`))
379	{
380	dentry_unref(dentry: entry.get());
381	return -EIO; // failed to create file
382	}
383
384	created = true;
385	}
386
387	if (!vfs_verify_permissions(file_dentry&: entry, open: true*, read, create: may_create, execute: exec, write))
388	{
389	dentry_unref(dentry: entry.get());
390	return -EACCES;
391	}
392
393	auto file = vfs_do_open_dentry(entry: entry.get(), created, read, write, exec, truncate);
394	if (file.isErr())
395	{
396	dentry_unref(dentry: entry.get());
397	return file.getErr();
398	}
399
400	return file;
401	}
402
403	// public functions
404	PtrResult<file_t> vfs_do_open_dentry(dentry_t entry, bool* created, bool read, bool write, bool exec, bool truncate)
405	{
406	MOS_ASSERT(entry->inode);
407	MOS_UNUSED(truncate);
408
409	file_t *file = mos::create<file_t>();
410	file->dentry = entry;
411
412	io_flags_t io_flags = IO_SEEKABLE;
413
414	if (read)
415	io_flags \|= IO_READABLE;
416
417	if (write)
418	io_flags \|= IO_WRITABLE;
419
420	if (exec)
421	io_flags \|= IO_EXECUTABLE;
422
423	// only regular files are mmapable
424	if (entry->inode->type == FILE_TYPE_REGULAR)
425	io_flags \|= IO_MMAPABLE;
426
427	if (file->dentry->inode->type == FILE_TYPE_DIRECTORY)
428	io_init(io: &file->io, type: IO_DIR, flags: (io_flags \| IO_READABLE) & ~IO_SEEKABLE, ops: &dir_io_ops);
429	else
430	io_init(io: &file->io, type: IO_FILE, flags: io_flags, ops: &file_io_ops);
431
432	const file_ops_t *ops = file_get_ops(file);
433	if (ops && ops->open)
434	{
435	bool opened = ops->open(file->dentry->inode, file, created);
436	if (!opened)
437	{
438	delete file;
439	return -ENOTSUP;
440	}
441	}
442
443	return file;
444	}
445
446	void vfs_register_filesystem(filesystem_t *fs)
447	{
448	if (vfs_find_filesystem(name: fs->name))
449	mos_panic("filesystem '%s' already registered", fs->name.c_str());
450
451	MOS_ASSERT(list_is_empty(list_node(fs)));
452
453	spinlock_acquire(&vfs_fs_list_lock);
454	list_node_append(head: &vfs_fs_list, list_node(fs));
455	spinlock_release(&vfs_fs_list_lock);
456
457	pr_dinfo2(vfs, "filesystem '%s' registered", fs->name.c_str());
458	}
459
460	long vfs_mount(const char device, const* char path, const* char fs, const* char *options)
461	{
462	filesystem_t *real_fs = vfs_find_filesystem(name: fs);
463	if (unlikely(real_fs == NULL))
464	{
465	mos_warn("filesystem '%s' not found", fs);
466	return -EINVAL;
467	}
468
469	MOS_ASSERT_X(real_fs->mount, "filesystem '%s' does not support mounting", real_fs->name.c_str());
470
471	if (unlikely(strcmp(path, "/") == `0`))
472	{
473	// special case: mount root filesystem
474	if (root_dentry)
475	{
476	pr_warn("root filesystem is already mounted");
477	return -EBUSY;
478	}
479	pr_dinfo2(vfs, "mounting root filesystem '%s'...", fs);
480	const auto mountResult = real_fs->mount(real_fs, device, options);
481	if (mountResult.isErr())
482	{
483	mos_warn("failed to mount root filesystem");
484	return -EIO;
485	}
486	else
487	{
488	root_dentry = mountResult.get();
489	}
490
491	pr_dinfo2(vfs, "root filesystem mounted, dentry=%p", (void *) root_dentry);
492
493	MOS_ASSERT(root_dentry->name.empty());
494	bool mounted = dentry_mount(mountpoint: root_dentry, root: root_dentry, fs: real_fs);
495	MOS_ASSERT(mounted);
496
497	return `0`;
498	}
499
500	auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD);
501	if (base.isErr())
502	return base.getErr();
503
504	auto mpRoot = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_DIR \| RESOLVE_EXPECT_EXIST);
505	if (mpRoot.isErr())
506	return mpRoot.getErr();
507
508	if (mpRoot ->is_mountpoint)
509	{
510	// we don't support overlaying filesystems yet
511	mos_warn("mount point is already mounted");
512	dentry_unref(dentry: mpRoot.get());
513	return -ENOTSUP;
514	}
515
516	// when mounting:
517	// mounted_root will have a reference of 1
518	// the mount_point will have its reference incremented by 1
519	auto mounted_root = real_fs->mount(real_fs, device, options);
520	if (mounted_root.isErr())
521	{
522	mos_warn("failed to mount filesystem");
523	return mounted_root.getErr();
524	}
525
526	const bool mounted = dentry_mount(mountpoint: mpRoot.get(), root: mounted_root.get(), fs: real_fs);
527	if (unlikely(!mounted))
528	{
529	mos_warn("failed to mount filesystem");
530	return -EIO;
531	}
532
533	MOS_ASSERT_X(mpRoot ->refcount == mounted_root ->refcount, "mountpoint refcount=%zu, mounted_root refcount=%zu", mpRoot ->refcount.load(),
534	mounted_root ->refcount.load());
535	pr_dinfo2(vfs, "mounted filesystem '%s' on '%s'", fs, path);
536	return `0`;
537	}
538
539	long vfs_unmount(const char *path)
540	{
541	auto mounted_root = dentry_resolve(starting_dir: root_dentry, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_DIR \| RESOLVE_EXPECT_EXIST);
542	if (mounted_root.isErr())
543	return mounted_root.getErr();
544
545	// the mounted root itself holds a ref, and the caller of this function
546	if (mounted_root ->refcount != `2`)
547	{
548	dentry_check_refstat(dentry: mounted_root.get());
549	mos_warn("refcount is not as expected");
550	return -EBUSY;
551	}
552
553	dentry_unref(dentry: mounted_root.get()); // release the reference held by this function
554
555	// unmounting root filesystem
556	auto mountpoint = dentry_unmount(root: mounted_root.get());
557	if (!mountpoint)
558	{
559	mos_warn("failed to unmount filesystem");
560	return -EIO;
561	}
562
563	MOS_ASSERT(mounted_root ->refcount == mountpoint->refcount && mountpoint->refcount == `1`);
564	if (mounted_root ->superblock->fs->unmount)
565	mounted_root ->superblock->fs->unmount(mounted_root ->superblock->fs, mounted_root.get());
566	else
567	MOS_ASSERT(dentry_unref_one_norelease(mounted_root.get()));
568	MOS_ASSERT_X(mounted_root ->refcount == `0`, "fs->umount should release the last reference to the mounted root");
569
570	if (mounted_root == root_dentry)
571	{
572	pr_info2("unmounted root filesystem");
573	root_dentry = NULL;
574	return `0`;
575	}
576
577	dentry_unref(dentry: mountpoint);
578	return `0`;
579	}
580
581	PtrResult<file_t> vfs_openat(int fd, const char *path, open_flags flags)
582	{
583	pr_dinfo2(vfs, "vfs_openat(fd=%d, path='%s', flags=%x)", fd, path, flags);
584	auto basedir = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd);
585	if (basedir.isErr())
586	return basedir.getErr();
587
588	auto file = vfs_do_open(base: basedir.get(), path, flags);
589	return file;
590	}
591
592	long vfs_fstatat(fd_t fd, const char path, file_stat_t __restrict statbuf, fstatat_flags flags)
593	{
594	if (flags & FSTATAT_FILE)
595	{
596	pr_dinfo2(vfs, "vfs_fstatat(fd=%d, path='%p', stat=%p, flags=%x)", fd, (void ) path, (void* *) statbuf, flags);
597	io_t *io = process_get_fd(current_process, fd);
598	if (!(io_valid(io) && (io->type == IO_FILE \|\| io->type == IO_DIR)))
599	return -EBADF; // io is closed, or is not a file or directory
600
601	file_t *file = container_of(io, file_t, io);
602	MOS_ASSERT(file);
603	if (statbuf)
604	vfs_copy_stat(statbuf, inode: file->dentry->inode);
605
606	return `0`;
607	}
608
609	pr_dinfo2(vfs, "vfs_fstatat(fd=%d, path='%s', stat=%p, flags=%x)", fd, path, (void *) statbuf, flags);
610	auto basedir = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd);
611	if (basedir.isErr())
612	return basedir.getErr();
613
614	lastseg_resolve_flags_t resolve_flags = RESOLVE_EXPECT_ANY_TYPE \| RESOLVE_EXPECT_EXIST;
615	if (flags & FSTATAT_NOFOLLOW)
616	resolve_flags \|= RESOLVE_SYMLINK_NOFOLLOW;
617
618	auto dentry = dentry_resolve(starting_dir: basedir.get(), root_dir: root_dentry, path, flags: resolve_flags);
619	if (dentry.isErr())
620	return dentry.getErr();
621
622	if (statbuf)
623	vfs_copy_stat(statbuf, inode: dentry ->inode);
624	dentry_unref(dentry: dentry.get());
625	return `0`;
626	}
627
628	size_t vfs_readlinkat(fd_t dirfd, const char path, char* *buf, size_t size)
629	{
630	auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd);
631	if (base.isErr())
632	return base.getErr();
633
634	auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_SYMLINK_NOFOLLOW \| RESOLVE_EXPECT_EXIST \| RESOLVE_EXPECT_FILE);
635	if (dentry.isErr())
636	return dentry.getErr();
637
638	if (dentry ->inode->type != FILE_TYPE_SYMLINK)
639	{
640	dentry_unref(dentry: dentry.get());
641	return -EINVAL;
642	}
643
644	const size_t len = dentry ->inode->ops->readlink(dentry.get(), buf, size);
645
646	dentry_unref(dentry: dentry.get());
647
648	if (len >= size) // buffer too small
649	return -ENAMETOOLONG;
650
651	return len;
652	}
653
654	long vfs_symlink(const char path, const* char *target)
655	{
656	pr_dinfo2(vfs, "vfs_symlink(path='%s', target='%s')", path, target);
657	auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD);
658	if (base.isErr())
659	return base.getErr();
660
661	auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_NONEXIST);
662	if (dentry.isErr())
663	return dentry.getErr();
664
665	dentry_t parent_dir = dentry_parent(dentry: dentry);
666	const bool created = parent_dir->inode->ops->symlink(parent_dir->inode, dentry.get(), target);
667
668	if (!created)
669	mos_warn("failed to create symlink '%s'", path);
670
671	dentry_unref(dentry: dentry.get());
672	return created ? `0` : -EIO;
673	}
674
675	long vfs_mkdir(const char *path)
676	{
677	pr_dinfo2(vfs, "vfs_mkdir('%s')", path);
678	auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD);
679	if (base.isErr())
680	return base.getErr();
681
682	auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_NONEXIST);
683	if (dentry.isErr())
684	return dentry.getErr();
685
686	dentry_t parent_dir = dentry_parent(dentry: dentry);
687	if (parent_dir->inode == NULL \|\| parent_dir->inode->ops == NULL \|\| parent_dir->inode->ops->mkdir == NULL)
688	{
689	dentry_unref(dentry: dentry.get());
690	return false;
691	}
692
693	// TODO: use umask or something else
694	const bool created = parent_dir->inode->ops->mkdir(parent_dir->inode, dentry.get(), parent_dir->inode->perm);
695
696	if (!created)
697	mos_warn("failed to create directory '%s'", path);
698
699	dentry_unref(dentry: dentry.get());
700	return created ? `0` : -EIO;
701	}
702
703	long vfs_rmdir(const char *path)
704	{
705	pr_dinfo2(vfs, "vfs_rmdir('%s')", path);
706	auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD);
707	if (base.isErr())
708	return base.getErr();
709
710	auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST \| RESOLVE_EXPECT_DIR);
711	if (dentry.isErr())
712	return dentry.getErr();
713
714	dentry_t parent_dir = dentry_parent(dentry: dentry);
715	if (parent_dir->inode == NULL \|\| parent_dir->inode->ops == NULL \|\| parent_dir->inode->ops->rmdir == NULL)
716	{
717	dentry_unref(dentry: dentry.get());
718	return -ENOTSUP;
719	}
720
721	const bool removed = parent_dir->inode->ops->rmdir(parent_dir->inode, dentry.get());
722
723	if (!removed)
724	mos_warn("failed to remove directory '%s'", path);
725
726	dentry_unref(dentry: dentry.get());
727	return removed ? `0` : -EIO;
728	}
729
730	size_t vfs_list_dir(io_t io, void* *user_buf, size_t user_size)
731	{
732	pr_dinfo2(vfs, "vfs_list_dir(io=%p, buf=%p, size=%zu)", (void ) io, (void* *) user_buf, user_size);
733	file_t *file = container_of(io, file_t, io);
734	if (unlikely(file->dentry->inode->type != FILE_TYPE_DIRECTORY))
735	{
736	mos_warn("not a directory");
737	return `0`;
738	}
739
740	if (file->private_data == NULL)
741	{
742	vfs_listdir_state_t *const state = mos::create<vfs_listdir_state_t>();
743	file->private_data = state;
744	linked_list_init(head_node: &state->entries);
745	state->n_count = state->read_offset = `0`;
746	vfs_populate_listdir_buf(dir: file->dentry, state);
747	}
748
749	vfs_listdir_state_t *const state = (vfs_listdir_state_t *) file->private_data;
750
751	if (state->read_offset >= state->n_count)
752	return `0`; // no more entries
753
754	size_t bytes_copied = `0`;
755	size_t i = `0`;
756	list_foreach(vfs_listdir_entry_t, entry, state->entries)
757	{
758	if (i++ < state->read_offset)
759	continue; // skip the entries we have already read
760
761	if (state->read_offset >= state->n_count)
762	break;
763
764	const size_t entry_size = sizeof(ino_t) + sizeof(off_t) + sizeof(short) + sizeof(char) + entry->name.size() + `1`; // +1 for the null terminator
765	if (bytes_copied + entry_size > user_size)
766	break;
767
768	struct dirent dirent = (struct* dirent ) (((char* *) user_buf) + bytes_copied);
769	dirent->d_ino = entry->ino;
770	dirent->d_type = entry->type;
771	dirent->d_reclen = entry_size;
772	dirent->d_off = entry_size - `1`;
773	memcpy(dest: dirent->d_name, src: entry->name.data(), n: entry->name.size());
774	dirent->d_name[entry->name.size()] = `'\0'`;
775	bytes_copied += entry_size;
776	state->read_offset++;
777	}
778
779	return bytes_copied;
780	}
781
782	long vfs_chdirat(fd_t dirfd, const char *path)
783	{
784	pr_dinfo2(vfs, "vfs_chdir('%s')", path);
785	auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd);
786	if (base.isErr())
787	return base.getErr();
788
789	auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST \| RESOLVE_EXPECT_DIR);
790	if (dentry.isErr())
791	return dentry.getErr();
792
793	auto old_cwd = dentry_from_fd(AT_FDCWD);
794	if (old_cwd)
795	dentry_unref(dentry: old_cwd.get());
796
797	current_process->working_directory = dentry.get();
798	return `0`;
799	}
800
801	ssize_t vfs_getcwd(char *buf, size_t size)
802	{
803	auto cwd = dentry_from_fd(AT_FDCWD);
804	if (cwd.isErr())
805	return cwd.getErr();
806
807	return dentry_path(dentry: cwd.get(), root: root_dentry, buf, size);
808	}
809
810	long vfs_fchmodat(fd_t fd, const char path, int* perm, int flags)
811	{
812	pr_dinfo2(vfs, "vfs_fchmodat(fd=%d, path='%s', perm=%o, flags=%x)", fd, path, perm, flags);
813	auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd);
814	if (base.isErr())
815	return base.getErr();
816
817	auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST \| RESOLVE_EXPECT_ANY_TYPE);
818	if (dentry.isErr())
819	return dentry.getErr();
820
821	// TODO: check if the underlying filesystem supports chmod, and is not read-only
822	dentry ->inode->perm = perm;
823	dentry_unref(dentry: dentry.get());
824	return `0`;
825	}
826
827	long vfs_unlinkat(fd_t dirfd, const char *path)
828	{
829	pr_dinfo2(vfs, "vfs_unlinkat(dirfd=%d, path='%s')", dirfd, path);
830	auto base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd);
831	if (base.isErr())
832	return base.getErr();
833
834	auto dentry = dentry_resolve(starting_dir: base.get(), root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST \| RESOLVE_EXPECT_FILE \| RESOLVE_SYMLINK_NOFOLLOW);
835	if (dentry.isErr())
836	return dentry.getErr();
837
838	dentry_t parent_dir = dentry_parent(dentry: dentry);
839	if (parent_dir->inode == NULL \|\| parent_dir->inode->ops == NULL \|\| parent_dir->inode->ops->unlink == NULL)
840	{
841	dentry_unref(dentry: dentry.get());
842	return -ENOTSUP;
843	}
844
845	if (!inode_unlink(dir: parent_dir->inode, dentry: dentry.get()))
846	{
847	dentry_unref(dentry: dentry.get());
848	return -EIO;
849	}
850
851	dentry_unref(dentry: dentry.get()); // it won't release dentry because dentry->inode is still valid
852	dentry_detach(dentry: dentry.get());
853	dentry_try_release(dentry: dentry.get());
854	return `0`;
855	}
856
857	long vfs_fsync(io_t io, bool* sync_metadata, off_t start, off_t end)
858	{
859	pr_dinfo2(vfs, "vfs_fsync(io=%p, sync_metadata=%d, start=%ld, end=%ld)", (void *) io, sync_metadata, start, end);
860	file_t *file = container_of(io, file_t, io);
861
862	const off_t nbytes = end - start;
863	const off_t npages = ALIGN_UP_TO_PAGE(nbytes) / MOS_PAGE_SIZE;
864	const off_t pgoffset = start / MOS_PAGE_SIZE;
865
866	long ret = do_pagecache_flush(file, pgoff: pgoffset, npages);
867	if (ret < `0`)
868	return ret;
869
870	if (sync_metadata)
871	{
872	ret = do_sync_inode(file);
873	if (ret < `0`)
874	return ret;
875	}
876
877	return ret;
878	}
879
880	// ! sysfs support
881
882	static bool vfs_sysfs_filesystems(sysfs_file_t *f)
883	{
884	list_foreach(filesystem_t, fs, vfs_fs_list)
885	{
886	sysfs_printf(file: f, fmt: "%s\n", fs->name.c_str());
887	}
888
889	return true;
890	}
891
892	static bool vfs_sysfs_mountpoints(sysfs_file_t *f)
893	{
894	char pathbuf[MOS_PATH_MAX_LENGTH];
895	list_foreach(mount_t, mp, vfs_mountpoint_list)
896	{
897	dentry_path(dentry: mp->mountpoint, root: root_dentry, buf: pathbuf, size: sizeof(pathbuf));
898	sysfs_printf(file: f, fmt: "%-20s %-10s\n", pathbuf, mp->fs->name.c_str());
899	}
900
901	return true;
902	}
903
904	static void vfs_sysfs_dentry_stats_stat_receiver(int depth, const dentry_t dentry, bool* mountroot, void *data)
905	{
906	sysfs_file_t file = (sysfs_file_t ) data;
907	sysfs_printf(file, fmt: "%*s%s: refcount=%zu%s\n", //
908	depth * `4`, //
909	"", //
910	dentry_name(dentry).c_str(), //
911	dentry->refcount.load(), //
912	mountroot ? " (mount root)" : (dentry->is_mountpoint ? " (mountpoint)" : "") //
913	);
914	}
915
916	static bool vfs_sysfs_dentry_stats(sysfs_file_t *f)
917	{
918	dentry_dump_refstat(dentry: root_dentry, receiver: vfs_sysfs_dentry_stats_stat_receiver, data: f);
919	return true;
920	}
921
922	static sysfs_item_t vfs_sysfs_items[] = {
923	SYSFS_RO_ITEM("filesystems", vfs_sysfs_filesystems),
924	SYSFS_RO_ITEM("mount", vfs_sysfs_mountpoints),
925	SYSFS_RO_ITEM("dentry_stats", vfs_sysfs_dentry_stats),
926	};
927
928	SYSFS_AUTOREGISTER(vfs, vfs_sysfs_items);
929

Browse the source code of MOS/kernel/filesystem/vfs.cpp