1// SPDX-License-Identifier: GPL-3.0-or-later
2
3#include "mos/lib/sync/spinlock.h"
4#include "mos/mm/physical/buddy.h"
5#include "mos/mm/physical/pmm.h"
6#include "mos/syslog/printk.h"
7
8#include <mos/lib/structures/list.h>
9#include <mos_stdlib.h>
10
11#define log2(x) \
12 __extension__({ \
13 __extension__ __auto_type _x = (x); \
14 _x ? (sizeof(_x) * 8 - 1 - __builtin_clzl(_x)) : 0; \
15 })
16#define log2_ceil(x) (log2(x) + (pow2(log2(x)) < x))
17
18static const size_t orders[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 };
19static const size_t max_order = MOS_ARRAY_SIZE(orders) - 1;
20
21static struct
22{
23 list_head freelists[MOS_ARRAY_SIZE(orders)];
24} buddy = { 0 };
25
26static spinlock_t buddy_lock = SPINLOCK_INIT;
27
28static void add_to_freelist(size_t order, phyframe_t *frame)
29{
30 MOS_ASSERT(spinlock_is_locked(&buddy_lock));
31 MOS_ASSERT(frame->state == PHYFRAME_FREE);
32 frame->order = order;
33 list_node_t *frame_node = list_node(frame);
34 MOS_ASSERT(list_is_empty(frame_node));
35
36 list_head *const head = &buddy.freelists[order];
37 list_head *node = head->next;
38
39 // performance hot spot, even a binary tree would always be faster
40 while (node != head && node < frame_node)
41 node = node->next;
42
43 list_node_insert_before(element: node, item: frame_node);
44}
45
46static pfn_t get_buddy_pfn(size_t page_pfn, size_t order)
47{
48 // the address of a block's "buddy" is equal to the XOR of the block's address and the block's size.
49 return page_pfn ^ pow2(order);
50}
51
52static void dump_list(size_t order)
53{
54 spinlock_acquire(&buddy_lock);
55 const list_head *head = &buddy.freelists[order];
56 pr_cont("\nlist of order %zu: ", order);
57 list_foreach(phyframe_t, frame, *head)
58 {
59 if (order == 0)
60 pr_cont("[" PFN_FMT "] ", phyframe_pfn(frame));
61 else
62 pr_cont(PFN_RANGE " ", phyframe_pfn(frame), phyframe_pfn(frame) + pow2(order) - 1);
63 }
64 spinlock_release(&buddy_lock);
65}
66
67/**
68 * @brief Add [start_pfn, start_pfn + nframes - 1] to the freelist of order `order`
69 *
70 * @param start_pfn physical frame number of the first frame in the block
71 * @param nframes number of frames in the block
72 * @param order order of the block, must be in [0, MAX_ORDER]
73 */
74static void populate_freelist(const size_t start_pfn, const size_t nframes, const size_t order)
75{
76 MOS_ASSERT(spinlock_is_locked(&buddy_lock));
77
78 const size_t step = pow2(order);
79
80 pr_dinfo2(pmm_buddy, " order: %zu, step: %zu", order, step);
81
82 pfn_t current = start_pfn;
83 size_t nframes_left = nframes;
84
85 for (; current + step <= start_pfn + nframes; current += step)
86 {
87 phyframe_t *const frame = pfn_phyframe(current);
88 linked_list_init(list_node(frame));
89 frame->state = PHYFRAME_FREE; // free or reserved
90
91 pr_dinfo2(pmm_buddy, " - " PFN_RANGE, current, current + step - 1);
92 frame->order = order;
93 add_to_freelist(order, frame);
94 nframes_left -= step;
95 }
96
97 // we have some frames left over, add them to a smaller order
98 if (nframes_left)
99 populate_freelist(start_pfn: current, nframes: nframes_left, order: order - 1);
100}
101
102static void break_this_pfn(pfn_t this_pfn, size_t this_order)
103{
104 MOS_ASSERT(spinlock_is_locked(&buddy_lock));
105
106 phyframe_t *const frame = pfn_phyframe(this_pfn);
107 MOS_ASSERT(frame->state == PHYFRAME_FREE); // must be free
108 list_remove(frame);
109
110 // split this frame into two frames of order-1
111 const pfn_t frame2_pfn = this_pfn + pow2(this_order - 1); // pow2(order) / 2
112 pr_dinfo2(pmm_buddy, " breaking order %zu" PFN_RANGE " -> " PFN_RANGE " and " PFN_RANGE, this_order, this_pfn, this_pfn + pow2(this_order) - 1, this_pfn,
113 frame2_pfn - 1, frame2_pfn, frame2_pfn + pow2(this_order - 1) - 1);
114
115 phyframe_t *const frame2 = pfn_phyframe(frame2_pfn);
116 linked_list_init(list_node(frame2)); // this must not be in any list, so we init it
117 frame2->state = frame->state; // which is PHYFRAME_FREE
118
119 add_to_freelist(order: this_order - 1, frame);
120 add_to_freelist(order: this_order - 1, frame: frame2);
121}
122
123static void extract_exact_range(pfn_t start, size_t nframes, enum phyframe_state state)
124{
125 MOS_ASSERT(spinlock_is_locked(&buddy_lock));
126
127 size_t last_nframes = 0;
128
129 while (nframes)
130 {
131 if (last_nframes == nframes)
132 {
133 phyframe_t *frame = pfn_phyframe(start);
134 if (state == PHYFRAME_RESERVED && frame->state == PHYFRAME_RESERVED)
135 {
136 // XXX: A great hack, there may be overlapped reserved ranges.
137 MOS_ASSERT(frame->order == 0);
138 start++;
139 nframes--;
140 }
141 else
142 {
143 mos_panic("infinite loop detected");
144 }
145 }
146
147 last_nframes = nframes;
148
149 MOS_ASSERT_X(start <= pmm_total_frames, "insane!");
150 pr_dinfo2(pmm_buddy, " extracting, n left: %zu, start: " PFN_FMT, nframes, start);
151
152 for (size_t order = max_order; order != (size_t) -1; order--)
153 {
154 // find if the freelist of this order contains something [start]
155 list_head *freelist = &buddy.freelists[order];
156 if (list_is_empty(head: freelist))
157 continue; // no, the frame must be at a lower order
158
159 list_foreach(phyframe_t, f, *freelist)
160 {
161 const pfn_t start_pfn = phyframe_pfn(f);
162 const pfn_t end_pfn = start_pfn + pow2(order) - 1;
163
164 if (start_pfn == start)
165 {
166 // so, we found a set of frame that starts with [start], here are the cases:
167 // - pow2(order) < nframes: we not only need this frame, but also some more frames
168 // - pow2(order) = nframes: we need this frame, and we are done
169 // - pow2(order) > nframes: we need this frame, but we need to break it into two smaller frames so that
170 // in the next iteration, a more precise subset of this frame can be found
171
172 pr_dinfo2(pmm_buddy, " found a frame that starts with " PFN_FMT "...", start);
173 if (pow2(order) <= nframes)
174 {
175 list_remove(f);
176 f->state = state;
177 f->order = 0;
178
179 nframes -= pow2(order);
180 start += pow2(order);
181
182 pr_dinfo2(pmm_buddy, " done, n left: %zu, start: " PFN_FMT, nframes, start);
183 break; // we're done with the current order
184 }
185 else
186 {
187 pr_dinfo2(pmm_buddy, " narrowing down...");
188 // break this frame into two smaller frames, so that in the next iteration
189 // we can find a frame that exactly ends with [start + nframes - 1]
190 break_this_pfn(this_pfn: start_pfn, this_order: order);
191 break;
192 }
193 }
194
195 if (start_pfn <= start && end_pfn >= start)
196 {
197 pr_dinfo2(pmm_buddy, " found a frame that contains " PFN_FMT, start);
198 // we found a frame that contains [start]
199 // so we will break it into two frames,
200 // - one of which contains [start], and
201 // - the other contains [start + 1, end_pfn]
202
203 // the lower order frames will be broken down in the next iteration of this loop
204 // until we have a frame that exactly starts with [start]
205 break_this_pfn(this_pfn: start_pfn, this_order: order);
206 break;
207 }
208 }
209
210 if (nframes == 0)
211 break; // fast exit path
212 }
213 }
214}
215
216static void break_the_order(const size_t order)
217{
218 if (order > orders[MOS_ARRAY_SIZE(orders) - 1])
219 return; // we can't break any further
220
221 const list_head *freelist = &buddy.freelists[order];
222 if (list_is_empty(head: freelist))
223 break_the_order(order: order + 1);
224
225 if (list_is_empty(head: freelist))
226 {
227 pr_dinfo2(pmm_buddy, " no free frames of order %zu, can't break", order);
228 return; // out of memory!
229 }
230
231 phyframe_t *const frame = list_entry(freelist->next, phyframe_t);
232 MOS_ASSERT(frame->state == PHYFRAME_FREE);
233 list_remove(frame);
234
235 const pfn_t frame_pfn = phyframe_pfn(frame);
236 const pfn_t frame2_pfn = frame_pfn + pow2(order - 1); // pow2(order) / 2
237
238 pr_dinfo2(pmm_buddy, " breaking order %3zu, " PFN_RANGE " -> " PFN_RANGE " and " PFN_RANGE, order, frame_pfn, frame_pfn + pow2(order) - 1, frame_pfn, frame2_pfn - 1,
239 frame2_pfn, frame2_pfn + pow2(order - 1) - 1);
240
241 phyframe_t *const frame2 = pfn_phyframe(frame2_pfn);
242 linked_list_init(list_node(frame2));
243 frame2->state = PHYFRAME_FREE;
244
245 add_to_freelist(order: order - 1, frame);
246 add_to_freelist(order: order - 1, frame: frame2);
247}
248
249/**
250 * @brief Try finding a buddy frame and merging it with the given frame
251 *
252 * @param pfn physical frame number
253 * @param order order of the frame, given by log2(nframes)
254 * @return true if a buddy was found and merged to a higher order freelist
255 * @return false if...
256 */
257[[nodiscard]] static bool try_merge(const pfn_t pfn, u8 order)
258{
259 if (order > orders[MOS_ARRAY_SIZE(orders) - 1])
260 {
261 pr_dinfo2(pmm_buddy, " order %hhu is too large, cannot merge", order);
262 return false;
263 }
264
265 phyframe_t *const frame = pfn_phyframe(pfn);
266
267 const pfn_t buddy_pfn = get_buddy_pfn(page_pfn: pfn, order);
268 if (buddy_pfn >= pmm_total_frames)
269 return false;
270
271 phyframe_t *const buddy = pfn_phyframe(buddy_pfn);
272 if (buddy->state != PHYFRAME_FREE)
273 {
274 pr_dinfo2(pmm_buddy, " buddy pfn " PFN_FMT " is not free for pfn " PFN_FMT ", not merging", buddy_pfn, pfn);
275 return false;
276 }
277
278 if (buddy->order != order)
279 {
280 pr_dinfo2(pmm_buddy, " buddy pfn " PFN_FMT " is not the same order (%hhu != %hhu) as " PFN_FMT ", not merging", buddy_pfn, buddy->order, order, pfn);
281 return false;
282 }
283
284 list_remove(buddy);
285 frame->state = PHYFRAME_FREE;
286
287 pr_dinfo2(pmm_buddy, " merging order %hhu, " PFN_RANGE " and " PFN_RANGE, order, pfn, pfn + pow2(order) - 1, buddy_pfn, buddy_pfn + pow2(order) - 1);
288
289 const size_t high_order_pfn = MIN(pfn, buddy_pfn); // the lower pfn
290
291 if (!try_merge(pfn: high_order_pfn, order: order + 1))
292 {
293 phyframe_t *const high_order_frame = pfn_phyframe(high_order_pfn);
294 linked_list_init(list_node(high_order_frame));
295 high_order_frame->state = PHYFRAME_FREE;
296 add_to_freelist(order: order + 1, frame: high_order_frame); // use the lower pfn
297 }
298 return true;
299}
300
301void buddy_dump_all()
302{
303 for (size_t i = 0; i < MOS_ARRAY_SIZE(buddy.freelists); i++)
304 dump_list(order: i);
305
306 pr_info("");
307}
308
309void buddy_init(size_t max_nframes)
310{
311 spinlock_acquire(&buddy_lock);
312 for (size_t i = 0; i < MOS_ARRAY_SIZE(buddy.freelists); i++)
313 {
314 pr_dinfo2(pmm_buddy, "init freelist[%zu], order: %zu", i, orders[i]);
315 linked_list_init(head_node: &buddy.freelists[i]);
316 }
317
318 const size_t order = MIN(log2(max_nframes), orders[MOS_ARRAY_SIZE(orders) - 1]);
319 populate_freelist(start_pfn: 0, nframes: max_nframes, order);
320 spinlock_release(&buddy_lock);
321}
322
323void buddy_reserve_n(pfn_t pfn, size_t nframes)
324{
325 spinlock_acquire(&buddy_lock);
326 pr_dinfo2(pmm_buddy, "reserving " PFN_RANGE " (%zu frames)", pfn, pfn + nframes - 1, nframes);
327 extract_exact_range(start: pfn, nframes, state: PHYFRAME_RESERVED);
328 spinlock_release(&buddy_lock);
329}
330
331phyframe_t *buddy_alloc_n_exact(size_t nframes)
332{
333 spinlock_acquire(&buddy_lock);
334 const size_t order = log2_ceil(nframes);
335
336 // check if this order is too large
337 if (order > orders[MOS_ARRAY_SIZE(orders) - 1])
338 return NULL;
339
340 pr_dinfo2(pmm_buddy, "allocating %zu contiguous frames (order %zu, which is %zu frames, wasting %zu frames)", nframes, order, pow2(order), pow2(order) - nframes);
341
342 list_head *free = &buddy.freelists[order];
343 if (list_is_empty(head: free))
344 break_the_order(order: order + 1);
345
346 if (unlikely(list_is_empty(free)))
347 {
348 spinlock_release(&buddy_lock);
349 pr_emerg("no free frames of order %zu, can't break", order);
350 pr_emerg("out of memory!");
351 return NULL; // out of memory!
352 }
353
354 phyframe_t *const frame = list_entry(free->next, phyframe_t);
355 const pfn_t start = phyframe_pfn(frame);
356
357 extract_exact_range(start, nframes, state: PHYFRAME_ALLOCATED); // extract the exact range from the buddy.freelists
358
359 for (size_t i = 0; i < nframes; i++)
360 {
361 phyframe_t *const f = pfn_phyframe(start + i);
362 f->state = PHYFRAME_ALLOCATED;
363 f->order = 0; // so that they can be freed individually
364 }
365
366 spinlock_release(&buddy_lock);
367 return frame;
368}
369
370void buddy_free_n(pfn_t pfn, size_t nframes)
371{
372 pr_dinfo2(pmm_buddy, "freeing " PFN_RANGE " (%zu frames)", pfn, pfn + nframes - 1, nframes);
373 spinlock_acquire(&buddy_lock);
374
375 phyframe_t *const frame = pfn_phyframe(pfn);
376 MOS_ASSERT_X(frame->state == PHYFRAME_ALLOCATED, "frame must be allocated");
377 MOS_ASSERT(list_is_empty(list_node(frame)));
378 frame->state = PHYFRAME_FREE;
379
380 const size_t order = log2_ceil(nframes);
381 if (!try_merge(pfn, order))
382 add_to_freelist(order, frame);
383
384 spinlock_release(&buddy_lock);
385}
386