1 | // SPDX-License-Identifier: GPL-3.0-or-later |
2 | |
3 | #include "mos/lib/sync/spinlock.h" |
4 | #include "mos/mm/physical/buddy.h" |
5 | #include "mos/mm/physical/pmm.h" |
6 | #include "mos/syslog/printk.h" |
7 | |
8 | #include <mos/lib/structures/list.h> |
9 | #include <mos_stdlib.h> |
10 | |
11 | #define log2(x) \ |
12 | __extension__({ \ |
13 | __extension__ __auto_type _x = (x); \ |
14 | _x ? (sizeof(_x) * 8 - 1 - __builtin_clzl(_x)) : 0; \ |
15 | }) |
16 | #define log2_ceil(x) (log2(x) + (pow2(log2(x)) < x)) |
17 | |
18 | static const size_t orders[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 }; |
19 | static const size_t max_order = MOS_ARRAY_SIZE(orders) - 1; |
20 | |
21 | static struct |
22 | { |
23 | list_head freelists[MOS_ARRAY_SIZE(orders)]; |
24 | } buddy = { 0 }; |
25 | |
26 | static spinlock_t buddy_lock = SPINLOCK_INIT; |
27 | |
28 | static void add_to_freelist(size_t order, phyframe_t *frame) |
29 | { |
30 | MOS_ASSERT(spinlock_is_locked(&buddy_lock)); |
31 | MOS_ASSERT(frame->state == PHYFRAME_FREE); |
32 | frame->order = order; |
33 | list_node_t *frame_node = list_node(frame); |
34 | MOS_ASSERT(list_is_empty(frame_node)); |
35 | |
36 | list_head *const head = &buddy.freelists[order]; |
37 | list_head *node = head->next; |
38 | |
39 | // performance hot spot, even a binary tree would always be faster |
40 | while (node != head && node < frame_node) |
41 | node = node->next; |
42 | |
43 | list_node_insert_before(element: node, item: frame_node); |
44 | } |
45 | |
46 | static pfn_t get_buddy_pfn(size_t page_pfn, size_t order) |
47 | { |
48 | // the address of a block's "buddy" is equal to the XOR of the block's address and the block's size. |
49 | return page_pfn ^ pow2(order); |
50 | } |
51 | |
52 | static void dump_list(size_t order) |
53 | { |
54 | spinlock_acquire(&buddy_lock); |
55 | const list_head *head = &buddy.freelists[order]; |
56 | pr_cont("\nlist of order %zu: " , order); |
57 | list_foreach(phyframe_t, frame, *head) |
58 | { |
59 | if (order == 0) |
60 | pr_cont("[" PFN_FMT "] " , phyframe_pfn(frame)); |
61 | else |
62 | pr_cont(PFN_RANGE " " , phyframe_pfn(frame), phyframe_pfn(frame) + pow2(order) - 1); |
63 | } |
64 | spinlock_release(&buddy_lock); |
65 | } |
66 | |
67 | /** |
68 | * @brief Add [start_pfn, start_pfn + nframes - 1] to the freelist of order `order` |
69 | * |
70 | * @param start_pfn physical frame number of the first frame in the block |
71 | * @param nframes number of frames in the block |
72 | * @param order order of the block, must be in [0, MAX_ORDER] |
73 | */ |
74 | static void populate_freelist(const size_t start_pfn, const size_t nframes, const size_t order) |
75 | { |
76 | MOS_ASSERT(spinlock_is_locked(&buddy_lock)); |
77 | |
78 | const size_t step = pow2(order); |
79 | |
80 | pr_dinfo2(pmm_buddy, " order: %zu, step: %zu" , order, step); |
81 | |
82 | pfn_t current = start_pfn; |
83 | size_t nframes_left = nframes; |
84 | |
85 | for (; current + step <= start_pfn + nframes; current += step) |
86 | { |
87 | phyframe_t *const frame = pfn_phyframe(current); |
88 | linked_list_init(list_node(frame)); |
89 | frame->state = PHYFRAME_FREE; // free or reserved |
90 | |
91 | pr_dinfo2(pmm_buddy, " - " PFN_RANGE, current, current + step - 1); |
92 | frame->order = order; |
93 | add_to_freelist(order, frame); |
94 | nframes_left -= step; |
95 | } |
96 | |
97 | // we have some frames left over, add them to a smaller order |
98 | if (nframes_left) |
99 | populate_freelist(start_pfn: current, nframes: nframes_left, order: order - 1); |
100 | } |
101 | |
102 | static void break_this_pfn(pfn_t this_pfn, size_t this_order) |
103 | { |
104 | MOS_ASSERT(spinlock_is_locked(&buddy_lock)); |
105 | |
106 | phyframe_t *const frame = pfn_phyframe(this_pfn); |
107 | MOS_ASSERT(frame->state == PHYFRAME_FREE); // must be free |
108 | list_remove(frame); |
109 | |
110 | // split this frame into two frames of order-1 |
111 | const pfn_t frame2_pfn = this_pfn + pow2(this_order - 1); // pow2(order) / 2 |
112 | pr_dinfo2(pmm_buddy, " breaking order %zu" PFN_RANGE " -> " PFN_RANGE " and " PFN_RANGE, this_order, this_pfn, this_pfn + pow2(this_order) - 1, this_pfn, |
113 | frame2_pfn - 1, frame2_pfn, frame2_pfn + pow2(this_order - 1) - 1); |
114 | |
115 | phyframe_t *const frame2 = pfn_phyframe(frame2_pfn); |
116 | linked_list_init(list_node(frame2)); // this must not be in any list, so we init it |
117 | frame2->state = frame->state; // which is PHYFRAME_FREE |
118 | |
119 | add_to_freelist(order: this_order - 1, frame); |
120 | add_to_freelist(order: this_order - 1, frame: frame2); |
121 | } |
122 | |
123 | static void (pfn_t start, size_t nframes, enum phyframe_state state) |
124 | { |
125 | MOS_ASSERT(spinlock_is_locked(&buddy_lock)); |
126 | |
127 | size_t last_nframes = 0; |
128 | |
129 | while (nframes) |
130 | { |
131 | if (last_nframes == nframes) |
132 | { |
133 | phyframe_t *frame = pfn_phyframe(start); |
134 | if (state == PHYFRAME_RESERVED && frame->state == PHYFRAME_RESERVED) |
135 | { |
136 | // XXX: A great hack, there may be overlapped reserved ranges. |
137 | MOS_ASSERT(frame->order == 0); |
138 | start++; |
139 | nframes--; |
140 | } |
141 | else |
142 | { |
143 | mos_panic("infinite loop detected" ); |
144 | } |
145 | } |
146 | |
147 | last_nframes = nframes; |
148 | |
149 | MOS_ASSERT_X(start <= pmm_total_frames, "insane!" ); |
150 | pr_dinfo2(pmm_buddy, " extracting, n left: %zu, start: " PFN_FMT, nframes, start); |
151 | |
152 | for (size_t order = max_order; order != (size_t) -1; order--) |
153 | { |
154 | // find if the freelist of this order contains something [start] |
155 | list_head *freelist = &buddy.freelists[order]; |
156 | if (list_is_empty(head: freelist)) |
157 | continue; // no, the frame must be at a lower order |
158 | |
159 | list_foreach(phyframe_t, f, *freelist) |
160 | { |
161 | const pfn_t start_pfn = phyframe_pfn(f); |
162 | const pfn_t end_pfn = start_pfn + pow2(order) - 1; |
163 | |
164 | if (start_pfn == start) |
165 | { |
166 | // so, we found a set of frame that starts with [start], here are the cases: |
167 | // - pow2(order) < nframes: we not only need this frame, but also some more frames |
168 | // - pow2(order) = nframes: we need this frame, and we are done |
169 | // - pow2(order) > nframes: we need this frame, but we need to break it into two smaller frames so that |
170 | // in the next iteration, a more precise subset of this frame can be found |
171 | |
172 | pr_dinfo2(pmm_buddy, " found a frame that starts with " PFN_FMT "..." , start); |
173 | if (pow2(order) <= nframes) |
174 | { |
175 | list_remove(f); |
176 | f->state = state; |
177 | f->order = 0; |
178 | |
179 | nframes -= pow2(order); |
180 | start += pow2(order); |
181 | |
182 | pr_dinfo2(pmm_buddy, " done, n left: %zu, start: " PFN_FMT, nframes, start); |
183 | break; // we're done with the current order |
184 | } |
185 | else |
186 | { |
187 | pr_dinfo2(pmm_buddy, " narrowing down..." ); |
188 | // break this frame into two smaller frames, so that in the next iteration |
189 | // we can find a frame that exactly ends with [start + nframes - 1] |
190 | break_this_pfn(this_pfn: start_pfn, this_order: order); |
191 | break; |
192 | } |
193 | } |
194 | |
195 | if (start_pfn <= start && end_pfn >= start) |
196 | { |
197 | pr_dinfo2(pmm_buddy, " found a frame that contains " PFN_FMT, start); |
198 | // we found a frame that contains [start] |
199 | // so we will break it into two frames, |
200 | // - one of which contains [start], and |
201 | // - the other contains [start + 1, end_pfn] |
202 | |
203 | // the lower order frames will be broken down in the next iteration of this loop |
204 | // until we have a frame that exactly starts with [start] |
205 | break_this_pfn(this_pfn: start_pfn, this_order: order); |
206 | break; |
207 | } |
208 | } |
209 | |
210 | if (nframes == 0) |
211 | break; // fast exit path |
212 | } |
213 | } |
214 | } |
215 | |
216 | static void break_the_order(const size_t order) |
217 | { |
218 | if (order > orders[MOS_ARRAY_SIZE(orders) - 1]) |
219 | return; // we can't break any further |
220 | |
221 | const list_head *freelist = &buddy.freelists[order]; |
222 | if (list_is_empty(head: freelist)) |
223 | break_the_order(order: order + 1); |
224 | |
225 | if (list_is_empty(head: freelist)) |
226 | { |
227 | pr_dinfo2(pmm_buddy, " no free frames of order %zu, can't break" , order); |
228 | return; // out of memory! |
229 | } |
230 | |
231 | phyframe_t *const frame = list_entry(freelist->next, phyframe_t); |
232 | MOS_ASSERT(frame->state == PHYFRAME_FREE); |
233 | list_remove(frame); |
234 | |
235 | const pfn_t frame_pfn = phyframe_pfn(frame); |
236 | const pfn_t frame2_pfn = frame_pfn + pow2(order - 1); // pow2(order) / 2 |
237 | |
238 | pr_dinfo2(pmm_buddy, " breaking order %3zu, " PFN_RANGE " -> " PFN_RANGE " and " PFN_RANGE, order, frame_pfn, frame_pfn + pow2(order) - 1, frame_pfn, frame2_pfn - 1, |
239 | frame2_pfn, frame2_pfn + pow2(order - 1) - 1); |
240 | |
241 | phyframe_t *const frame2 = pfn_phyframe(frame2_pfn); |
242 | linked_list_init(list_node(frame2)); |
243 | frame2->state = PHYFRAME_FREE; |
244 | |
245 | add_to_freelist(order: order - 1, frame); |
246 | add_to_freelist(order: order - 1, frame: frame2); |
247 | } |
248 | |
249 | /** |
250 | * @brief Try finding a buddy frame and merging it with the given frame |
251 | * |
252 | * @param pfn physical frame number |
253 | * @param order order of the frame, given by log2(nframes) |
254 | * @return true if a buddy was found and merged to a higher order freelist |
255 | * @return false if... |
256 | */ |
257 | [[nodiscard]] static bool try_merge(const pfn_t pfn, u8 order) |
258 | { |
259 | if (order > orders[MOS_ARRAY_SIZE(orders) - 1]) |
260 | { |
261 | pr_dinfo2(pmm_buddy, " order %hhu is too large, cannot merge" , order); |
262 | return false; |
263 | } |
264 | |
265 | phyframe_t *const frame = pfn_phyframe(pfn); |
266 | |
267 | const pfn_t buddy_pfn = get_buddy_pfn(page_pfn: pfn, order); |
268 | if (buddy_pfn >= pmm_total_frames) |
269 | return false; |
270 | |
271 | phyframe_t *const buddy = pfn_phyframe(buddy_pfn); |
272 | if (buddy->state != PHYFRAME_FREE) |
273 | { |
274 | pr_dinfo2(pmm_buddy, " buddy pfn " PFN_FMT " is not free for pfn " PFN_FMT ", not merging" , buddy_pfn, pfn); |
275 | return false; |
276 | } |
277 | |
278 | if (buddy->order != order) |
279 | { |
280 | pr_dinfo2(pmm_buddy, " buddy pfn " PFN_FMT " is not the same order (%hhu != %hhu) as " PFN_FMT ", not merging" , buddy_pfn, buddy->order, order, pfn); |
281 | return false; |
282 | } |
283 | |
284 | list_remove(buddy); |
285 | frame->state = PHYFRAME_FREE; |
286 | |
287 | pr_dinfo2(pmm_buddy, " merging order %hhu, " PFN_RANGE " and " PFN_RANGE, order, pfn, pfn + pow2(order) - 1, buddy_pfn, buddy_pfn + pow2(order) - 1); |
288 | |
289 | const size_t high_order_pfn = MIN(pfn, buddy_pfn); // the lower pfn |
290 | |
291 | if (!try_merge(pfn: high_order_pfn, order: order + 1)) |
292 | { |
293 | phyframe_t *const high_order_frame = pfn_phyframe(high_order_pfn); |
294 | linked_list_init(list_node(high_order_frame)); |
295 | high_order_frame->state = PHYFRAME_FREE; |
296 | add_to_freelist(order: order + 1, frame: high_order_frame); // use the lower pfn |
297 | } |
298 | return true; |
299 | } |
300 | |
301 | void buddy_dump_all() |
302 | { |
303 | for (size_t i = 0; i < MOS_ARRAY_SIZE(buddy.freelists); i++) |
304 | dump_list(order: i); |
305 | |
306 | pr_info("" ); |
307 | } |
308 | |
309 | void buddy_init(size_t max_nframes) |
310 | { |
311 | spinlock_acquire(&buddy_lock); |
312 | for (size_t i = 0; i < MOS_ARRAY_SIZE(buddy.freelists); i++) |
313 | { |
314 | pr_dinfo2(pmm_buddy, "init freelist[%zu], order: %zu" , i, orders[i]); |
315 | linked_list_init(head_node: &buddy.freelists[i]); |
316 | } |
317 | |
318 | const size_t order = MIN(log2(max_nframes), orders[MOS_ARRAY_SIZE(orders) - 1]); |
319 | populate_freelist(start_pfn: 0, nframes: max_nframes, order); |
320 | spinlock_release(&buddy_lock); |
321 | } |
322 | |
323 | void buddy_reserve_n(pfn_t pfn, size_t nframes) |
324 | { |
325 | spinlock_acquire(&buddy_lock); |
326 | pr_dinfo2(pmm_buddy, "reserving " PFN_RANGE " (%zu frames)" , pfn, pfn + nframes - 1, nframes); |
327 | extract_exact_range(start: pfn, nframes, state: PHYFRAME_RESERVED); |
328 | spinlock_release(&buddy_lock); |
329 | } |
330 | |
331 | phyframe_t *buddy_alloc_n_exact(size_t nframes) |
332 | { |
333 | spinlock_acquire(&buddy_lock); |
334 | const size_t order = log2_ceil(nframes); |
335 | |
336 | // check if this order is too large |
337 | if (order > orders[MOS_ARRAY_SIZE(orders) - 1]) |
338 | return NULL; |
339 | |
340 | pr_dinfo2(pmm_buddy, "allocating %zu contiguous frames (order %zu, which is %zu frames, wasting %zu frames)" , nframes, order, pow2(order), pow2(order) - nframes); |
341 | |
342 | list_head *free = &buddy.freelists[order]; |
343 | if (list_is_empty(head: free)) |
344 | break_the_order(order: order + 1); |
345 | |
346 | if (unlikely(list_is_empty(free))) |
347 | { |
348 | spinlock_release(&buddy_lock); |
349 | pr_emerg("no free frames of order %zu, can't break" , order); |
350 | pr_emerg("out of memory!" ); |
351 | return NULL; // out of memory! |
352 | } |
353 | |
354 | phyframe_t *const frame = list_entry(free->next, phyframe_t); |
355 | const pfn_t start = phyframe_pfn(frame); |
356 | |
357 | extract_exact_range(start, nframes, state: PHYFRAME_ALLOCATED); // extract the exact range from the buddy.freelists |
358 | |
359 | for (size_t i = 0; i < nframes; i++) |
360 | { |
361 | phyframe_t *const f = pfn_phyframe(start + i); |
362 | f->state = PHYFRAME_ALLOCATED; |
363 | f->order = 0; // so that they can be freed individually |
364 | } |
365 | |
366 | spinlock_release(&buddy_lock); |
367 | return frame; |
368 | } |
369 | |
370 | void buddy_free_n(pfn_t pfn, size_t nframes) |
371 | { |
372 | pr_dinfo2(pmm_buddy, "freeing " PFN_RANGE " (%zu frames)" , pfn, pfn + nframes - 1, nframes); |
373 | spinlock_acquire(&buddy_lock); |
374 | |
375 | phyframe_t *const frame = pfn_phyframe(pfn); |
376 | MOS_ASSERT_X(frame->state == PHYFRAME_ALLOCATED, "frame must be allocated" ); |
377 | MOS_ASSERT(list_is_empty(list_node(frame))); |
378 | frame->state = PHYFRAME_FREE; |
379 | |
380 | const size_t order = log2_ceil(nframes); |
381 | if (!try_merge(pfn, order)) |
382 | add_to_freelist(order, frame); |
383 | |
384 | spinlock_release(&buddy_lock); |
385 | } |
386 | |