vfs.c source code [MOS/kernel/filesystem/vfs.c]

1	// SPDX-License-Identifier: GPL-3.0-or-later
2
3	#include "mos/assert.h"
4	#include "mos/device/timer.h"
5	#include "mos/filesystem/inode.h"
6	#include "mos/filesystem/mount.h"
7	#include "mos/filesystem/page_cache.h"
8	#include "mos/filesystem/sysfs/sysfs.h"
9	#include "mos/filesystem/sysfs/sysfs_autoinit.h"
10	#include "mos/mm/mm.h"
11	#include "mos/mm/mmstat.h"
12	#include "mos/mm/physical/pmm.h"
13	#include "mos/mm/slab_autoinit.h"
14
15	#include <dirent.h>
16	#include <errno.h>
17	#include <mos/filesystem/dentry.h>
18	#include <mos/filesystem/fs_types.h>
19	#include <mos/filesystem/vfs.h>
20	#include <mos/filesystem/vfs_types.h>
21	#include <mos/io/io.h>
22	#include <mos/lib/structures/list.h>
23	#include <mos/lib/structures/tree.h>
24	#include <mos/lib/sync/spinlock.h>
25	#include <mos/mos_global.h>
26	#include <mos/platform/platform.h>
27	#include <mos/syslog/printk.h>
28	#include <mos/tasks/process.h>
29	#include <mos/types.h>
30	#include <mos_stdlib.h>
31	#include <mos_string.h>
32
33	static list_head vfs_fs_list = LIST_HEAD_INIT(vfs_fs_list); // filesystem_t
34	static spinlock_t vfs_fs_list_lock = SPINLOCK_INIT;
35
36	dentry_t *root_dentry = NULL;
37
38	slab_t superblock_cache = NULL, mount_cache = NULL, *file_cache = NULL;
39
40	SLAB_AUTOINIT("superblock", superblock_cache, superblock_t);
41	SLAB_AUTOINIT("mount", mount_cache, mount_t);
42	SLAB_AUTOINIT("file", file_cache, file_t);
43
44	static long do_pagecache_flush(file_t *file, off_t pgoff, size_t npages)
45	{
46	pr_dinfo2(vfs, "vfs: flushing page cache for file %pio", (void *) &file->io);
47
48	mutex_acquire(mutex: &file->dentry->inode->cache.lock);
49	long ret = `0`;
50	if (pgoff == `0` && npages == (size_t) -`1`)
51	ret = pagecache_flush_or_drop_all(icache: &file->dentry->inode->cache, drop_page: false);
52	else
53	ret = pagecache_flush_or_drop(icache: &file->dentry->inode->cache, pgoff, npages, drop_page: false);
54
55	mutex_release(mutex: &file->dentry->inode->cache.lock);
56	return ret;
57	}
58
59	static long do_sync_inode(file_t *file)
60	{
61	const superblock_ops_t *ops = file->dentry->inode->superblock->ops;
62	if (ops && ops->sync_inode)
63	return ops->sync_inode(file->dentry->inode);
64
65	return `0`;
66	}
67
68	// BEGIN: filesystem's io_t operations
69	static void vfs_io_ops_close(io_t *io)
70	{
71	file_t *file = container_of(io, file_t, io);
72	if (io->type == IO_FILE && io->flags & IO_WRITABLE) // only flush if the file is writable
73	{
74	do_pagecache_flush(file, pgoff: `0`, npages: (off_t) -`1`);
75	do_sync_inode(file);
76	}
77
78	dentry_unref(dentry: file->dentry);
79
80	if (io->type == IO_FILE)
81	{
82	const file_ops_t *file_ops = file_get_ops(file);
83	if (file_ops)
84	{
85	if (file_ops->release)
86	file_ops->release(file);
87	}
88	}
89
90	kfree(ptr: file);
91	}
92
93	static void vfs_io_ops_close_dir(io_t *io)
94	{
95	file_t *file = container_of(io, file_t, io);
96
97	if (file->private_data)
98	{
99	vfs_listdir_state_t *state = file->private_data;
100	list_foreach(vfs_listdir_entry_t, entry, state->entries)
101	{
102	list_remove(entry);
103	kfree(ptr: entry->name);
104	kfree(ptr: entry);
105	}
106
107	kfree(ptr: state);
108	file->private_data = NULL;
109	}
110
111	vfs_io_ops_close(io); // close the file
112	}
113
114	static size_t vfs_io_ops_read(io_t io, void* *buf, size_t count)
115	{
116	file_t *file = container_of(io, file_t, io);
117	const file_ops_t *const file_ops = file_get_ops(file);
118	if (!file_ops \|\| !file_ops->read)
119	return `0`;
120
121	spinlock_acquire(&file->offset_lock);
122	size_t ret = file_ops->read(file, buf, count, file->offset);
123	if (IS_ERR_VALUE(ret))
124	; // do nothing
125	else if (ret != (size_t) -`1`)
126	file->offset += ret;
127	spinlock_release(&file->offset_lock);
128
129	return ret;
130	}
131
132	static size_t vfs_io_ops_write(io_t io, const* void *buf, size_t count)
133	{
134	file_t *file = container_of(io, file_t, io);
135	const file_ops_t *const file_ops = file_get_ops(file);
136	if (!file_ops \|\| !file_ops->write)
137	return `0`;
138
139	spinlock_acquire(&file->offset_lock);
140	size_t ret = file_ops->write(file, buf, count, file->offset);
141	if (!IS_ERR_VALUE(ret))
142	file->offset += ret;
143	spinlock_release(&file->offset_lock);
144	return ret;
145	}
146
147	static off_t vfs_io_ops_seek(io_t *io, off_t offset, io_seek_whence_t whence)
148	{
149	file_t *file = container_of(io, file_t, io);
150
151	const file_ops_t *const ops = file_get_ops(file);
152	if (ops->seek)
153	return ops->seek(file, offset, whence); // use the filesystem's lseek if it exists
154
155	spinlock_acquire(&file->offset_lock);
156
157	switch (whence)
158	{
159	case IO_SEEK_SET:
160	{
161	file->offset = MAX(offset, `0`);
162	break;
163	}
164	case IO_SEEK_CURRENT:
165	{
166	off_t new_offset = file->offset + offset;
167	new_offset = MAX(new_offset, `0`);
168	file->offset = new_offset;
169	break;
170	}
171	case IO_SEEK_END:
172	{
173	off_t new_offset = file->dentry->inode->size + offset;
174	new_offset = MAX(new_offset, `0`);
175	file->offset = new_offset;
176	break;
177	}
178	case IO_SEEK_DATA: mos_warn("vfs: IO_SEEK_DATA is not supported"); break;
179	case IO_SEEK_HOLE: mos_warn("vfs: IO_SEEK_HOLE is not supported"); break;
180	};
181
182	spinlock_release(&file->offset_lock);
183	return file->offset;
184	}
185
186	static vmfault_result_t vfs_fault_handler(vmap_t vmap, ptr_t fault_addr, pagefault_t info)
187	{
188	MOS_ASSERT(vmap->io);
189	file_t *file = container_of(vmap->io, file_t, io);
190	const size_t fault_pgoffset = (vmap->io_offset + ALIGN_DOWN_TO_PAGE(fault_addr) - vmap->vaddr) / MOS_PAGE_SIZE;
191
192	mutex_acquire(mutex: &file->dentry->inode->cache.lock); // lock the inode cache
193	phyframe_t *const pagecache_page = pagecache_get_page_for_read(cache: &file->dentry->inode->cache, pgoff: fault_pgoffset);
194	mutex_release(mutex: &file->dentry->inode->cache.lock);
195
196	if (IS_ERR(ptr: pagecache_page))
197	return VMFAULT_CANNOT_HANDLE;
198
199	// ! mm subsystem has verified that this vmap can be written to, but in the page table it's marked as read-only
200	// currently, only CoW pages have this property, we treat this as a CoW page*
201	if (info->is_present && info->is_write)
202	{
203	if (pagecache_page == info->faulting_page)
204	vmap_stat_dec(vmap, pagecache); // the faulting page is a pagecache page
205	else
206	vmap_stat_dec(vmap, cow); // the faulting page is a COW page
207	vmap_stat_inc(vmap, regular);
208	return mm_resolve_cow_fault(vmap, fault_addr, info); // resolve by copying data page into prevate page
209	}
210
211	info->backing_page = pagecache_page;
212	if (vmap->type == VMAP_TYPE_PRIVATE)
213	{
214	if (info->is_write)
215	{
216	vmap_stat_inc(vmap, regular);
217	// present pages are handled above
218	MOS_ASSERT(!info->is_present);
219	return VMFAULT_COPY_BACKING_PAGE; // copy and (also) map the backing page
220	}
221	else
222	{
223	vmap_stat_inc(vmap, pagecache);
224	vmap_stat_inc(vmap, cow);
225	return VMFAULT_MAP_BACKING_PAGE_RO;
226	}
227	}
228	else
229	{
230	vmap_stat_inc(vmap, pagecache);
231	vmap_stat_inc(vmap, regular);
232	return VMFAULT_MAP_BACKING_PAGE;
233	}
234	}
235
236	static bool vfs_io_ops_mmap(io_t io, vmap_t vmap, off_t offset)
237	{
238	file_t *file = container_of(io, file_t, io);
239	const file_ops_t *const file_ops = file_get_ops(file);
240
241	MOS_ASSERT(!vmap->on_fault); // there should be no fault handler set
242	vmap->on_fault = vfs_fault_handler;
243
244	if (file_ops->mmap)
245	return file_ops->mmap(file, vmap, offset);
246
247	return true;
248	}
249
250	static bool vfs_io_ops_munmap(io_t io, vmap_t vmap, bool *unmapped)
251	{
252	file_t *file = container_of(io, file_t, io);
253	const file_ops_t *const file_ops = file_get_ops(file);
254
255	if (file_ops->munmap)
256	return file_ops->munmap(file, vmap, unmapped);
257
258	return true;
259	}
260
261	static void vfs_io_ops_getname(const io_t io, char* *buf, size_t size)
262	{
263	const file_t *file = container_of(io, file_t, io);
264	dentry_path(dentry: file->dentry, root: root_dentry, buf, size);
265	}
266
267	static const io_op_t file_io_ops = {
268	.read = vfs_io_ops_read,
269	.write = vfs_io_ops_write,
270	.close = vfs_io_ops_close,
271	.seek = vfs_io_ops_seek,
272	.mmap = vfs_io_ops_mmap,
273	.munmap = vfs_io_ops_munmap,
274	.get_name = vfs_io_ops_getname,
275	};
276
277	static const io_op_t dir_io_ops = {
278	.read = vfs_list_dir,
279	.close = vfs_io_ops_close_dir,
280	.get_name = vfs_io_ops_getname,
281	};
282
283	// END: filesystem's io_t operations
284
285	static void vfs_flusher_entry(void *arg)
286	{
287	MOS_UNUSED(arg);
288	while (true)
289	{
290	timer_msleep(ms: `10` * `1000`);
291	// pagecache_flush_all();
292	}
293	}
294
295	static void vfs_flusher_init(void)
296	{
297	// kthread_create(vfs_flusher_entry, NULL, "vfs_flusher");
298	}
299	MOS_INIT(KTHREAD, vfs_flusher_init);
300
301	static void vfs_copy_stat(file_stat_t statbuf, inode_t inode)
302	{
303	statbuf->ino = inode->ino;
304	statbuf->type = inode->type;
305	statbuf->perm = inode->perm;
306	statbuf->size = inode->size;
307	statbuf->uid = inode->uid;
308	statbuf->gid = inode->gid;
309	statbuf->sticky = inode->sticky;
310	statbuf->suid = inode->suid;
311	statbuf->sgid = inode->sgid;
312	statbuf->nlinks = inode->nlinks;
313	statbuf->accessed = inode->accessed;
314	statbuf->modified = inode->modified;
315	statbuf->created = inode->created;
316	}
317
318	static filesystem_t vfs_find_filesystem(const* char *name)
319	{
320	filesystem_t *fs_found = NULL;
321	spinlock_acquire(&vfs_fs_list_lock);
322	list_foreach(filesystem_t, fs, vfs_fs_list)
323	{
324	if (strcmp(str1: fs->name, str2: name) == `0`)
325	{
326	fs_found = fs;
327	break;
328	}
329	}
330	spinlock_release(&vfs_fs_list_lock);
331	return fs_found;
332	}
333
334	static bool vfs_verify_permissions(dentry_t file_dentry, bool* open, bool read, bool create, bool execute, bool write)
335	{
336	MOS_ASSERT(file_dentry && file_dentry->inode);
337	const file_perm_t file_perm = file_dentry->inode->perm;
338
339	// TODO: we are treating all users as root for now, only checks for execute permission
340	MOS_UNUSED(open);
341	MOS_UNUSED(read);
342	MOS_UNUSED(create);
343	MOS_UNUSED(write);
344
345	if (execute && !(file_perm & PERM_EXEC))
346	return false; // execute permission denied
347
348	return true;
349	}
350
351	static file_t vfs_do_open(dentry_t base, const char *path, open_flags flags)
352	{
353	if (base == NULL)
354	return NULL;
355
356	const bool may_create = flags & OPEN_CREATE;
357	const bool read = flags & OPEN_READ;
358	const bool write = flags & OPEN_WRITE;
359	const bool exec = flags & OPEN_EXECUTE;
360	const bool no_follow = flags & OPEN_NO_FOLLOW;
361	const bool expect_dir = flags & OPEN_DIR;
362	const bool truncate = flags & OPEN_TRUNCATE;
363
364	lastseg_resolve_flags_t resolve_flags = RESOLVE_EXPECT_FILE \| //
365	(no_follow ? RESOLVE_SYMLINK_NOFOLLOW : `0`) \| //
366	(may_create ? RESOLVE_EXPECT_ANY_EXIST : RESOLVE_EXPECT_EXIST) \| //
367	(expect_dir ? RESOLVE_EXPECT_DIR : `0`);
368	dentry_t *entry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: resolve_flags);
369	if (IS_ERR(ptr: entry))
370	{
371	pr_dinfo2(vfs, "failed to resolve '%s': create=%d, r=%d, x=%d, nofollow=%d, dir=%d, truncate=%d", path, may_create, read, exec, no_follow, expect_dir, truncate);
372	return ERR(ptr: entry);
373	}
374
375	bool created = false;
376
377	if (may_create && entry->inode == NULL)
378	{
379	dentry_t *parent = dentry_parent(dentry: entry);
380	if (!parent->inode->ops->newfile)
381	{
382	dentry_unref(dentry: entry);
383	return ERR_PTR(error: -EROFS);
384	}
385
386	if (!parent->inode->ops->newfile(parent->inode, entry, FILE_TYPE_REGULAR, `0666`))
387	{
388	dentry_unref(dentry: entry);
389	return ERR_PTR(error: -EIO); // failed to create file
390	}
391
392	created = true;
393	}
394
395	if (!vfs_verify_permissions(file_dentry: entry, open: true, read, create: may_create, execute: exec, write))
396	{
397	dentry_unref(dentry: entry);
398	return ERR_PTR(error: -EACCES);
399	}
400
401	file_t *file = vfs_do_open_dentry(entry, created, read, write, exec, truncate);
402	if (IS_ERR(ptr: file))
403	{
404	kfree(ptr: file);
405	dentry_unref(dentry: entry);
406	return ERR(ptr: file);
407	}
408
409	return file;
410	}
411
412	// public functions
413	file_t vfs_do_open_dentry(dentry_t entry, bool created, bool read, bool write, bool exec, bool truncate)
414	{
415	MOS_ASSERT(entry->inode);
416	MOS_UNUSED(truncate);
417
418	file_t *file = kmalloc(file_cache);
419	file->dentry = entry;
420
421	io_flags_t io_flags = IO_SEEKABLE;
422
423	if (read)
424	io_flags \|= IO_READABLE;
425
426	if (write)
427	io_flags \|= IO_WRITABLE;
428
429	if (exec)
430	io_flags \|= IO_EXECUTABLE;
431
432	// only regular files are mmapable
433	if (entry->inode->type == FILE_TYPE_REGULAR)
434	io_flags \|= IO_MMAPABLE;
435
436	if (file->dentry->inode->type == FILE_TYPE_DIRECTORY)
437	io_init(io: &file->io, type: IO_DIR, flags: (io_flags \| IO_READABLE) & ~IO_SEEKABLE, ops: &dir_io_ops);
438	else
439	io_init(io: &file->io, type: IO_FILE, flags: io_flags, ops: &file_io_ops);
440
441	const file_ops_t *ops = file_get_ops(file);
442	if (ops && ops->open)
443	{
444	bool opened = ops->open(file->dentry->inode, file, created);
445	if (!opened)
446	return ERR_PTR(error: -ENOTSUP);
447	}
448
449	return file;
450	}
451
452	void vfs_register_filesystem(filesystem_t *fs)
453	{
454	if (vfs_find_filesystem(name: fs->name))
455	mos_panic("filesystem '%s' already registered", fs->name);
456
457	MOS_ASSERT(list_is_empty(list_node(fs)));
458
459	spinlock_acquire(&vfs_fs_list_lock);
460	list_node_append(head: &vfs_fs_list, list_node(fs));
461	spinlock_release(&vfs_fs_list_lock);
462
463	pr_dinfo2(vfs, "filesystem '%s' registered", fs->name);
464	}
465
466	long vfs_mount(const char device, const* char path, const* char fs, const* char *options)
467	{
468	filesystem_t *real_fs = vfs_find_filesystem(name: fs);
469	if (unlikely(real_fs == NULL))
470	{
471	mos_warn("filesystem '%s' not found", fs);
472	return -EINVAL;
473	}
474
475	MOS_ASSERT_X(real_fs->mount, "filesystem '%s' does not support mounting", real_fs->name);
476
477	if (unlikely(strcmp(path, "/") == `0`))
478	{
479	// special case: mount root filesystem
480	if (root_dentry)
481	{
482	pr_warn("root filesystem is already mounted");
483	return -EBUSY;
484	}
485	pr_dinfo2(vfs, "mounting root filesystem '%s'...", fs);
486	root_dentry = real_fs->mount(real_fs, device, options);
487	if (root_dentry == NULL)
488	{
489	mos_warn("failed to mount root filesystem");
490	return -EIO;
491	}
492	pr_dinfo2(vfs, "root filesystem mounted, dentry=%p", (void *) root_dentry);
493
494	MOS_ASSERT(root_dentry->name == NULL);
495	bool mounted = dentry_mount(mountpoint: root_dentry, root: root_dentry, fs: real_fs);
496	MOS_ASSERT(mounted);
497
498	return `0`;
499	}
500
501	dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD);
502	dentry_t *mountpoint = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_DIR \| RESOLVE_EXPECT_EXIST);
503	if (IS_ERR(ptr: mountpoint))
504	return PTR_ERR(ptr: mountpoint);
505
506	if (mountpoint->is_mountpoint)
507	{
508	// we don't support overlaying filesystems yet
509	mos_warn("mount point is already mounted");
510	dentry_unref(dentry: mountpoint);
511	return -ENOTSUP;
512	}
513
514	// when mounting:
515	// mounted_root will have a reference of 1
516	// the mount_point will have its reference incremented by 1
517	dentry_t *mounted_root = real_fs->mount(real_fs, device, options);
518	if (IS_ERR(ptr: mounted_root))
519	{
520	mos_warn("failed to mount filesystem");
521	return PTR_ERR(ptr: mounted_root);
522	}
523
524	const bool mounted = dentry_mount(mountpoint, root: mounted_root, fs: real_fs);
525	if (unlikely(!mounted))
526	{
527	mos_warn("failed to mount filesystem");
528	return -EIO;
529	}
530
531	MOS_ASSERT_X(mountpoint->refcount == mounted_root->refcount, "mountpoint refcount=%zu, mounted_root refcount=%zu", mountpoint->refcount, mounted_root->refcount);
532	pr_dinfo2(vfs, "mounted filesystem '%s' on '%s'", fs, path);
533	return `0`;
534	}
535
536	long vfs_unmount(const char *path)
537	{
538	dentry_t *mounted_root = dentry_resolve(starting_dir: root_dentry, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_DIR \| RESOLVE_EXPECT_EXIST);
539	if (IS_ERR(ptr: mounted_root))
540	return PTR_ERR(ptr: mounted_root);
541
542	// the mounted root itself holds a ref, and the caller of this function
543	if (mounted_root->refcount != `2`)
544	{
545	dentry_check_refstat(dentry: mounted_root);
546	mos_warn("refcount is not as expected");
547	return -EBUSY;
548	}
549
550	dentry_unref(dentry: mounted_root); // release the reference held by this function
551
552	// unmounting root filesystem
553	dentry_t *mountpoint = dentry_unmount(root: mounted_root);
554	if (!mountpoint)
555	{
556	mos_warn("failed to unmount filesystem");
557	return -EIO;
558	}
559
560	MOS_ASSERT(mounted_root->refcount == mountpoint->refcount && mountpoint->refcount == `1`);
561	if (mounted_root->superblock->fs->unmount)
562	mounted_root->superblock->fs->unmount(mounted_root->superblock->fs, mounted_root);
563	else
564	MOS_ASSERT(dentry_unref_one_norelease(mounted_root));
565	MOS_ASSERT_X(mounted_root->refcount == `0`, "fs->umount should release the last reference to the mounted root");
566
567	if (mounted_root == root_dentry)
568	{
569	pr_info2("unmounted root filesystem");
570	root_dentry = NULL;
571	return `0`;
572	}
573
574	dentry_unref(dentry: mountpoint);
575	return `0`;
576	}
577
578	file_t vfs_openat(int* fd, const char *path, open_flags flags)
579	{
580	pr_dinfo2(vfs, "vfs_openat(fd=%d, path='%s', flags=%x)", fd, path, flags);
581	dentry_t *basedir = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd);
582	if (IS_ERR(ptr: basedir))
583	return ERR(ptr: basedir);
584	file_t *file = vfs_do_open(base: basedir, path, flags);
585	return file;
586	}
587
588	long vfs_fstatat(fd_t fd, const char path, file_stat_t restrict statbuf, fstatat_flags flags)
589	{
590	if (flags & FSTATAT_FILE)
591	{
592	pr_dinfo2(vfs, "vfs_fstatat(fd=%d, path='%p', stat=%p, flags=%x)", fd, (void ) path, (void* *) statbuf, flags);
593	io_t *io = process_get_fd(current_process, fd);
594	if (!(io_valid(io) && (io->type == IO_FILE \|\| io->type == IO_DIR)))
595	return -EBADF; // io is closed, or is not a file or directory
596
597	file_t *file = container_of(io, file_t, io);
598	MOS_ASSERT(file);
599	if (statbuf)
600	vfs_copy_stat(statbuf, inode: file->dentry->inode);
601
602	return `0`;
603	}
604
605	pr_dinfo2(vfs, "vfs_fstatat(fd=%d, path='%s', stat=%p, flags=%x)", fd, path, (void *) statbuf, flags);
606	dentry_t *basedir = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd);
607	if (IS_ERR(ptr: basedir))
608	return PTR_ERR(ptr: basedir);
609	lastseg_resolve_flags_t resolve_flags = RESOLVE_EXPECT_ANY_TYPE \| RESOLVE_EXPECT_EXIST;
610	if (flags & FSTATAT_NOFOLLOW)
611	resolve_flags \|= RESOLVE_SYMLINK_NOFOLLOW;
612
613	dentry_t *dentry = dentry_resolve(starting_dir: basedir, root_dir: root_dentry, path, flags: resolve_flags);
614	if (IS_ERR(ptr: dentry))
615	return PTR_ERR(ptr: dentry);
616
617	if (statbuf)
618	vfs_copy_stat(statbuf, inode: dentry->inode);
619	dentry_unref(dentry);
620	return `0`;
621	}
622
623	size_t vfs_readlinkat(fd_t dirfd, const char path, char* *buf, size_t size)
624	{
625	dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd);
626	dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_SYMLINK_NOFOLLOW \| RESOLVE_EXPECT_EXIST \| RESOLVE_EXPECT_FILE);
627	if (IS_ERR(ptr: dentry))
628	return PTR_ERR(ptr: dentry);
629
630	if (dentry->inode->type != FILE_TYPE_SYMLINK)
631	{
632	dentry_unref(dentry);
633	return -EINVAL;
634	}
635
636	const size_t len = dentry->inode->ops->readlink(dentry, buf, size);
637
638	dentry_unref(dentry);
639
640	if (len >= size) // buffer too small
641	return -ENAMETOOLONG;
642
643	return len;
644	}
645
646	long vfs_symlink(const char path, const* char *target)
647	{
648	pr_dinfo2(vfs, "vfs_symlink(path='%s', target='%s')", path, target);
649	dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD);
650	dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_NONEXIST);
651	if (IS_ERR(ptr: dentry))
652	return PTR_ERR(ptr: dentry);
653
654	dentry_t *parent_dir = dentry_parent(dentry);
655	const bool created = parent_dir->inode->ops->symlink(parent_dir->inode, dentry, target);
656
657	if (!created)
658	mos_warn("failed to create symlink '%s'", path);
659
660	dentry_unref(dentry);
661	return created ? `0` : -EIO;
662	}
663
664	long vfs_mkdir(const char *path)
665	{
666	pr_dinfo2(vfs, "vfs_mkdir('%s')", path);
667	dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD);
668	dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_NONEXIST);
669	if (IS_ERR(ptr: dentry))
670	return PTR_ERR(ptr: dentry);
671
672	dentry_t *parent_dir = dentry_parent(dentry);
673	if (parent_dir->inode == NULL \|\| parent_dir->inode->ops == NULL \|\| parent_dir->inode->ops->mkdir == NULL)
674	{
675	dentry_unref(dentry);
676	return false;
677	}
678
679	// TODO: use umask or something else
680	const bool created = parent_dir->inode->ops->mkdir(parent_dir->inode, dentry, parent_dir->inode->perm);
681
682	if (!created)
683	mos_warn("failed to create directory '%s'", path);
684
685	dentry_unref(dentry);
686	return created ? `0` : -EIO;
687	}
688
689	long vfs_rmdir(const char *path)
690	{
691	pr_dinfo2(vfs, "vfs_rmdir('%s')", path);
692	dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(AT_FDCWD);
693	dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST \| RESOLVE_EXPECT_DIR);
694	if (IS_ERR(ptr: dentry))
695	return PTR_ERR(ptr: dentry);
696
697	dentry_t *parent_dir = dentry_parent(dentry);
698	if (parent_dir->inode == NULL \|\| parent_dir->inode->ops == NULL \|\| parent_dir->inode->ops->rmdir == NULL)
699	{
700	dentry_unref(dentry);
701	return -ENOTSUP;
702	}
703
704	const bool removed = parent_dir->inode->ops->rmdir(parent_dir->inode, dentry);
705
706	if (!removed)
707	mos_warn("failed to remove directory '%s'", path);
708
709	dentry_unref(dentry);
710	return removed ? `0` : -EIO;
711	}
712
713	size_t vfs_list_dir(io_t io, void* *user_buf, size_t user_size)
714	{
715	pr_dinfo2(vfs, "vfs_list_dir(io=%p, buf=%p, size=%zu)", (void ) io, (void* *) user_buf, user_size);
716	file_t *file = container_of(io, file_t, io);
717	if (unlikely(file->dentry->inode->type != FILE_TYPE_DIRECTORY))
718	{
719	mos_warn("not a directory");
720	return `0`;
721	}
722
723	if (file->private_data == NULL)
724	{
725	vfs_listdir_state_t *const state = file->private_data = kmalloc(sizeof(vfs_listdir_state_t));
726	linked_list_init(head_node: &state->entries);
727	state->n_count = state->read_offset = `0`;
728	vfs_populate_listdir_buf(dir: file->dentry, state);
729	}
730
731	vfs_listdir_state_t *const state = file->private_data;
732
733	if (state->read_offset >= state->n_count)
734	return `0`; // no more entries
735
736	size_t bytes_copied = `0`;
737	size_t i = `0`;
738	list_foreach(vfs_listdir_entry_t, entry, state->entries)
739	{
740	if (i++ < state->read_offset)
741	continue; // skip the entries we have already read
742
743	if (state->read_offset >= state->n_count)
744	break;
745
746	const size_t entry_size = sizeof(ino_t) + sizeof(off_t) + sizeof(short) + sizeof(char) + entry->name_len + `1`; // +1 for the null terminator
747	if (bytes_copied + entry_size > user_size)
748	break;
749
750	struct dirent dirent = (struct* dirent ) (((char* *) user_buf) + bytes_copied);
751	dirent->d_ino = entry->ino;
752	dirent->d_type = entry->type;
753	dirent->d_reclen = entry_size;
754	dirent->d_off = entry_size - `1`;
755	memcpy(dest: dirent->d_name, src: entry->name, n: entry->name_len);
756	dirent->d_name[entry->name_len] = `'\0'`;
757	bytes_copied += entry_size;
758	state->read_offset++;
759	}
760
761	return bytes_copied;
762	}
763
764	long vfs_chdirat(fd_t dirfd, const char *path)
765	{
766	pr_dinfo2(vfs, "vfs_chdir('%s')", path);
767	dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd);
768	dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST \| RESOLVE_EXPECT_DIR);
769	if (IS_ERR(ptr: dentry))
770	return PTR_ERR(ptr: dentry);
771
772	dentry_t *old_cwd = dentry_from_fd(AT_FDCWD);
773	if (old_cwd)
774	dentry_unref(dentry: old_cwd);
775
776	current_process->working_directory = dentry;
777	return `0`;
778	}
779
780	ssize_t vfs_getcwd(char *buf, size_t size)
781	{
782	dentry_t *cwd = dentry_from_fd(AT_FDCWD);
783	if (IS_ERR(ptr: cwd))
784	return PTR_ERR(ptr: cwd);
785
786	return dentry_path(dentry: cwd, root: root_dentry, buf, size);
787	}
788
789	long vfs_fchmodat(fd_t fd, const char path, int* perm, int flags)
790	{
791	pr_dinfo2(vfs, "vfs_fchmodat(fd=%d, path='%s', perm=%o, flags=%x)", fd, path, perm, flags);
792	dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd);
793	dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST \| RESOLVE_EXPECT_ANY_TYPE);
794	if (IS_ERR(ptr: dentry))
795	return PTR_ERR(ptr: dentry);
796
797	// TODO: check if the underlying filesystem supports chmod, and is not read-only
798	dentry->inode->perm = perm;
799	dentry_unref(dentry);
800	return `0`;
801	}
802
803	long vfs_unlinkat(fd_t dirfd, const char *path)
804	{
805	pr_dinfo2(vfs, "vfs_unlinkat(dirfd=%d, path='%s')", dirfd, path);
806	dentry_t *base = path_is_absolute(path) ? root_dentry : dentry_from_fd(fd: dirfd);
807	dentry_t *dentry = dentry_resolve(starting_dir: base, root_dir: root_dentry, path, flags: RESOLVE_EXPECT_EXIST \| RESOLVE_EXPECT_FILE \| RESOLVE_SYMLINK_NOFOLLOW);
808	if (IS_ERR(ptr: dentry))
809	return PTR_ERR(ptr: dentry);
810
811	dentry_t *parent_dir = dentry_parent(dentry);
812	if (parent_dir->inode == NULL \|\| parent_dir->inode->ops == NULL \|\| parent_dir->inode->ops->unlink == NULL)
813	{
814	dentry_unref(dentry);
815	return -ENOTSUP;
816	}
817
818	if (!inode_unlink(dir: parent_dir->inode, dentry))
819	{
820	dentry_unref(dentry);
821	return -EIO;
822	}
823
824	dentry_unref(dentry); // it won't release dentry because dentry->inode is still valid
825	dentry_detach(dentry);
826	dentry_try_release(dentry);
827	return `0`;
828	}
829
830	long vfs_fsync(io_t io, bool* sync_metadata, off_t start, off_t end)
831	{
832	pr_dinfo2(vfs, "vfs_fsync(io=%p, sync_metadata=%d, start=%ld, end=%ld)", (void *) io, sync_metadata, start, end);
833	file_t *file = container_of(io, file_t, io);
834
835	const off_t nbytes = end - start;
836	const off_t npages = ALIGN_UP_TO_PAGE(nbytes) / MOS_PAGE_SIZE;
837	const off_t pgoffset = start / MOS_PAGE_SIZE;
838
839	long ret = do_pagecache_flush(file, pgoff: pgoffset, npages);
840	if (ret < `0`)
841	return ret;
842
843	if (sync_metadata)
844	{
845	ret = do_sync_inode(file);
846	if (ret < `0`)
847	return ret;
848	}
849
850	return ret;
851	}
852
853	// ! sysfs support
854
855	static bool vfs_sysfs_filesystems(sysfs_file_t *f)
856	{
857	list_foreach(filesystem_t, fs, vfs_fs_list)
858	{
859	sysfs_printf(file: f, fmt: "%s\n", fs->name);
860	}
861
862	return true;
863	}
864
865	static bool vfs_sysfs_mountpoints(sysfs_file_t *f)
866	{
867	char pathbuf[MOS_PATH_MAX_LENGTH];
868	list_foreach(mount_t, mp, vfs_mountpoint_list)
869	{
870	dentry_path(dentry: mp->mountpoint, root: root_dentry, buf: pathbuf, size: sizeof(pathbuf));
871	sysfs_printf(file: f, fmt: "%-20s %-10s\n", pathbuf, mp->fs->name);
872	}
873
874	return true;
875	}
876
877	static void vfs_sysfs_dentry_stats_stat_receiver(int depth, const dentry_t dentry, bool* mountroot, void *data)
878	{
879	sysfs_file_t *file = data;
880	sysfs_printf(file, fmt: "%*s%s: refcount=%zu%s\n", //
881	depth * `4`, //
882	"", //
883	dentry_name(dentry), //
884	dentry->refcount, //
885	mountroot ? " (mount root)" : (dentry->is_mountpoint ? " (mountpoint)" : "") //
886	);
887	}
888
889	static bool vfs_sysfs_dentry_stats(sysfs_file_t *f)
890	{
891	dentry_dump_refstat(dentry: root_dentry, receiver: vfs_sysfs_dentry_stats_stat_receiver, data: f);
892	return true;
893	}
894
895	static sysfs_item_t vfs_sysfs_items[] = {
896	SYSFS_RO_ITEM("filesystems", vfs_sysfs_filesystems),
897	SYSFS_RO_ITEM("mount", vfs_sysfs_mountpoints),
898	SYSFS_RO_ITEM("dentry_stats", vfs_sysfs_dentry_stats),
899	};
900
901	SYSFS_AUTOREGISTER(vfs, vfs_sysfs_items);
902

Browse the source code of MOS/kernel/filesystem/vfs.c