1 | // SPDX-License-Identifier: GPL-3.0-or-later |
2 | |
3 | #include "mos/assert.hpp" |
4 | #include "mos/lib/sync/spinlock.hpp" |
5 | #include "mos/mm/physical/buddy.hpp" |
6 | #include "mos/mm/physical/pmm.hpp" |
7 | #include "mos/syslog/printk.hpp" |
8 | |
9 | #include <algorithm> |
10 | #include <mos/lib/structures/list.hpp> |
11 | #include <mos_stdlib.hpp> |
12 | |
13 | constexpr size_t pow2(size_t x) |
14 | { |
15 | return 1 << x; |
16 | } |
17 | |
18 | constexpr size_t log2(size_t x) |
19 | { |
20 | return x ? (sizeof(x) * 8 - 1 - __builtin_clzl(x)) : 0; |
21 | } |
22 | |
23 | #define log2_ceil(x) (log2(x) + (pow2(log2(x)) < x)) |
24 | |
25 | static const size_t orders[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 }; |
26 | static const size_t max_order = MOS_ARRAY_SIZE(orders) - 1; |
27 | |
28 | static struct |
29 | { |
30 | list_head freelists[MOS_ARRAY_SIZE(orders)]; |
31 | } buddy; |
32 | |
33 | static spinlock_t buddy_lock; |
34 | |
35 | static void add_to_freelist(size_t order, phyframe_t *frame) |
36 | { |
37 | MOS_ASSERT(spinlock_is_locked(&buddy_lock)); |
38 | MOS_ASSERT(frame->state == phyframe::PHYFRAME_FREE); |
39 | frame->order = order; |
40 | list_node_t *frame_node = list_node(&frame->info); |
41 | MOS_ASSERT(list_is_empty(frame_node)); |
42 | |
43 | list_head *const head = &buddy.freelists[order]; |
44 | list_head *node = head->next; |
45 | |
46 | // performance hot spot, even a binary tree would always be faster |
47 | while (node != head && node < frame_node) |
48 | node = node->next; |
49 | |
50 | list_node_insert_before(element: node, item: frame_node); |
51 | } |
52 | |
53 | static pfn_t get_buddy_pfn(size_t page_pfn, size_t order) |
54 | { |
55 | // the address of a block's "buddy" is equal to the XOR of the block's address and the block's size. |
56 | return page_pfn ^ pow2(x: order); |
57 | } |
58 | |
59 | static void dump_list(size_t order) |
60 | { |
61 | spinlock_acquire(&buddy_lock); |
62 | const list_head *head = &buddy.freelists[order]; |
63 | pr_cont("\nlist of order %zu: " , order); |
64 | list_foreach(phyframe_t::additional_info, info, *head) |
65 | { |
66 | const auto frame = container_of(info, phyframe_t, info); |
67 | |
68 | if (order == 0) |
69 | pr_cont("[" PFN_FMT "] " , phyframe_pfn(frame)); |
70 | else |
71 | pr_cont(PFN_RANGE " " , phyframe_pfn(frame), phyframe_pfn(frame) + pow2(order) - 1); |
72 | } |
73 | spinlock_release(&buddy_lock); |
74 | } |
75 | |
76 | /** |
77 | * @brief Add [start_pfn, start_pfn + nframes - 1] to the freelist of order `order` |
78 | * |
79 | * @param start_pfn physical frame number of the first frame in the block |
80 | * @param nframes number of frames in the block |
81 | * @param order order of the block, must be in [0, MAX_ORDER] |
82 | */ |
83 | static void populate_freelist(const size_t start_pfn, const size_t nframes, const size_t order) |
84 | { |
85 | MOS_ASSERT(spinlock_is_locked(&buddy_lock)); |
86 | |
87 | const size_t step = pow2(x: order); |
88 | |
89 | pr_dinfo2(pmm_buddy, " order: %zu, step: %zu" , order, step); |
90 | |
91 | pfn_t current = start_pfn; |
92 | size_t nframes_left = nframes; |
93 | |
94 | for (; current + step <= start_pfn + nframes; current += step) |
95 | { |
96 | phyframe_t *const frame = pfn_phyframe(current); |
97 | linked_list_init(list_node(&frame->info)); |
98 | frame->state = phyframe::PHYFRAME_FREE; // free or reserved |
99 | |
100 | pr_dinfo2(pmm_buddy, " - " PFN_RANGE, current, current + step - 1); |
101 | frame->order = order; |
102 | add_to_freelist(order, frame); |
103 | nframes_left -= step; |
104 | } |
105 | |
106 | // we have some frames left over, add them to a smaller order |
107 | if (nframes_left) |
108 | populate_freelist(start_pfn: current, nframes: nframes_left, order: order - 1); |
109 | } |
110 | |
111 | static void break_this_pfn(pfn_t this_pfn, size_t this_order) |
112 | { |
113 | MOS_ASSERT(spinlock_is_locked(&buddy_lock)); |
114 | |
115 | phyframe_t *const frame = pfn_phyframe(this_pfn); |
116 | MOS_ASSERT(frame->state == phyframe::PHYFRAME_FREE); // must be free |
117 | list_remove(&frame->info); |
118 | |
119 | // split this frame into two frames of order-1 |
120 | const pfn_t frame2_pfn = this_pfn + pow2(x: this_order - 1); // pow2(order) / 2 |
121 | pr_dinfo2(pmm_buddy, " breaking order %zu" PFN_RANGE " -> " PFN_RANGE " and " PFN_RANGE, this_order, this_pfn, this_pfn + pow2(this_order) - 1, this_pfn, |
122 | frame2_pfn - 1, frame2_pfn, frame2_pfn + pow2(this_order - 1) - 1); |
123 | |
124 | phyframe_t *const frame2 = pfn_phyframe(frame2_pfn); |
125 | linked_list_init(list_node(&frame2->info)); // this must not be in any list, so we init it |
126 | frame2->state = frame->state; // which is PHYFRAME_FREE |
127 | |
128 | add_to_freelist(order: this_order - 1, frame); |
129 | add_to_freelist(order: this_order - 1, frame: frame2); |
130 | } |
131 | |
132 | static void (pfn_t start, size_t nframes, enum phyframe::phyframe_state state) |
133 | { |
134 | MOS_ASSERT(spinlock_is_locked(&buddy_lock)); |
135 | |
136 | size_t last_nframes = 0; |
137 | |
138 | while (nframes) |
139 | { |
140 | if (last_nframes == nframes) |
141 | { |
142 | phyframe_t *frame = pfn_phyframe(start); |
143 | if (state == phyframe::PHYFRAME_RESERVED && frame->state == phyframe::PHYFRAME_RESERVED) |
144 | { |
145 | // XXX: A great hack, there may be overlapped reserved ranges. |
146 | MOS_ASSERT(frame->order == 0); |
147 | start++; |
148 | nframes--; |
149 | } |
150 | else |
151 | { |
152 | mos_panic("infinite loop detected" ); |
153 | } |
154 | } |
155 | |
156 | last_nframes = nframes; |
157 | |
158 | MOS_ASSERT_X(start <= pmm_total_frames, "insane!" ); |
159 | pr_dinfo2(pmm_buddy, " extracting, n left: %zu, start: " PFN_FMT, nframes, start); |
160 | |
161 | for (size_t order = max_order; order != (size_t) -1; order--) |
162 | { |
163 | // find if the freelist of this order contains something [start] |
164 | list_head *freelist = &buddy.freelists[order]; |
165 | if (list_is_empty(head: freelist)) |
166 | continue; // no, the frame must be at a lower order |
167 | |
168 | list_foreach(phyframe_t::additional_info, info, *freelist) |
169 | { |
170 | const auto f = container_of(info, phyframe_t, info); |
171 | const pfn_t start_pfn = phyframe_pfn(f); |
172 | const pfn_t end_pfn = start_pfn + pow2(x: order) - 1; |
173 | |
174 | if (start_pfn == start) |
175 | { |
176 | // so, we found a set of frame that starts with [start], here are the cases: |
177 | // - pow2(order) < nframes: we not only need this frame, but also some more frames |
178 | // - pow2(order) = nframes: we need this frame, and we are done |
179 | // - pow2(order) > nframes: we need this frame, but we need to break it into two smaller frames so that |
180 | // in the next iteration, a more precise subset of this frame can be found |
181 | |
182 | pr_dinfo2(pmm_buddy, " found a frame that starts with " PFN_FMT "..." , start); |
183 | if (pow2(x: order) <= nframes) |
184 | { |
185 | list_remove(&f->info); |
186 | f->state = state; |
187 | f->order = 0; |
188 | |
189 | nframes -= pow2(x: order); |
190 | start += pow2(x: order); |
191 | |
192 | pr_dinfo2(pmm_buddy, " done, n left: %zu, start: " PFN_FMT, nframes, start); |
193 | break; // we're done with the current order |
194 | } |
195 | else |
196 | { |
197 | pr_dinfo2(pmm_buddy, " narrowing down..." ); |
198 | // break this frame into two smaller frames, so that in the next iteration |
199 | // we can find a frame that exactly ends with [start + nframes - 1] |
200 | break_this_pfn(this_pfn: start_pfn, this_order: order); |
201 | break; |
202 | } |
203 | } |
204 | |
205 | if (start_pfn <= start && end_pfn >= start) |
206 | { |
207 | pr_dinfo2(pmm_buddy, " found a frame that contains " PFN_FMT, start); |
208 | // we found a frame that contains [start] |
209 | // so we will break it into two frames, |
210 | // - one of which contains [start], and |
211 | // - the other contains [start + 1, end_pfn] |
212 | |
213 | // the lower order frames will be broken down in the next iteration of this loop |
214 | // until we have a frame that exactly starts with [start] |
215 | break_this_pfn(this_pfn: start_pfn, this_order: order); |
216 | break; |
217 | } |
218 | } |
219 | |
220 | if (nframes == 0) |
221 | break; // fast exit path |
222 | } |
223 | } |
224 | } |
225 | |
226 | static void break_the_order(const size_t order) |
227 | { |
228 | if (order > orders[MOS_ARRAY_SIZE(orders) - 1]) |
229 | return; // we can't break any further |
230 | |
231 | const list_head *freelist = &buddy.freelists[order]; |
232 | if (list_is_empty(head: freelist)) |
233 | break_the_order(order: order + 1); |
234 | |
235 | if (list_is_empty(head: freelist)) |
236 | { |
237 | pr_dinfo2(pmm_buddy, " no free frames of order %zu, can't break" , order); |
238 | return; // out of memory! |
239 | } |
240 | |
241 | const auto info = list_entry(freelist->next, phyframe_t::additional_info); |
242 | const auto frame = container_of(info, phyframe_t, info); |
243 | MOS_ASSERT(frame->state == phyframe::PHYFRAME_FREE); |
244 | list_remove(info); |
245 | |
246 | const pfn_t frame_pfn = phyframe_pfn(frame); |
247 | const pfn_t frame2_pfn = frame_pfn + pow2(x: order - 1); // pow2(order) / 2 |
248 | |
249 | pr_dinfo2(pmm_buddy, " breaking order %3zu, " PFN_RANGE " -> " PFN_RANGE " and " PFN_RANGE, order, frame_pfn, frame_pfn + pow2(order) - 1, frame_pfn, frame2_pfn - 1, |
250 | frame2_pfn, frame2_pfn + pow2(order - 1) - 1); |
251 | |
252 | phyframe_t *const frame2 = pfn_phyframe(frame2_pfn); |
253 | linked_list_init(list_node(&frame2->info)); |
254 | frame2->state = phyframe::PHYFRAME_FREE; |
255 | |
256 | add_to_freelist(order: order - 1, frame); |
257 | add_to_freelist(order: order - 1, frame: frame2); |
258 | } |
259 | |
260 | /** |
261 | * @brief Try finding a buddy frame and merging it with the given frame |
262 | * |
263 | * @param pfn physical frame number |
264 | * @param order order of the frame, given by log2(nframes) |
265 | * @return true if a buddy was found and merged to a higher order freelist |
266 | * @return false if... |
267 | */ |
268 | [[nodiscard]] static bool try_merge(const pfn_t pfn, u8 order) |
269 | { |
270 | if (order > orders[MOS_ARRAY_SIZE(orders) - 1]) |
271 | { |
272 | pr_dinfo2(pmm_buddy, " order %hhu is too large, cannot merge" , order); |
273 | return false; |
274 | } |
275 | |
276 | phyframe_t *const frame = pfn_phyframe(pfn); |
277 | |
278 | const pfn_t buddy_pfn = get_buddy_pfn(page_pfn: pfn, order); |
279 | if (buddy_pfn >= pmm_total_frames) |
280 | return false; |
281 | |
282 | phyframe_t *const buddy = pfn_phyframe(buddy_pfn); |
283 | if (buddy->state != phyframe::PHYFRAME_FREE) |
284 | { |
285 | pr_dinfo2(pmm_buddy, " buddy pfn " PFN_FMT " is not free for pfn " PFN_FMT ", not merging" , buddy_pfn, pfn); |
286 | return false; |
287 | } |
288 | |
289 | if (buddy->order != order) |
290 | { |
291 | pr_dinfo2(pmm_buddy, " buddy pfn " PFN_FMT " is not the same order (%hhu != %hhu) as " PFN_FMT ", not merging" , buddy_pfn, buddy->order, order, pfn); |
292 | return false; |
293 | } |
294 | |
295 | list_remove(&buddy->info); |
296 | frame->state = phyframe::PHYFRAME_FREE; |
297 | |
298 | pr_dinfo2(pmm_buddy, " merging order %hhu, " PFN_RANGE " and " PFN_RANGE, order, pfn, pfn + pow2(order) - 1, buddy_pfn, buddy_pfn + pow2(order) - 1); |
299 | |
300 | const size_t high_order_pfn = std::min(a: pfn, b: buddy_pfn); // the lower pfn |
301 | |
302 | if (!try_merge(pfn: high_order_pfn, order: order + 1)) |
303 | { |
304 | phyframe_t *const high_order_frame = pfn_phyframe(high_order_pfn); |
305 | linked_list_init(list_node(&high_order_frame->info)); |
306 | high_order_frame->state = phyframe::PHYFRAME_FREE; |
307 | add_to_freelist(order: order + 1, frame: high_order_frame); // use the lower pfn |
308 | } |
309 | return true; |
310 | } |
311 | |
312 | void buddy_dump_all() |
313 | { |
314 | for (size_t i = 0; i < MOS_ARRAY_SIZE(buddy.freelists); i++) |
315 | dump_list(order: i); |
316 | |
317 | pr_info("" ); |
318 | } |
319 | |
320 | void buddy_init(size_t max_nframes) |
321 | { |
322 | spinlock_acquire(&buddy_lock); |
323 | for (size_t i = 0; i < MOS_ARRAY_SIZE(buddy.freelists); i++) |
324 | { |
325 | pr_dinfo2(pmm_buddy, "init freelist[%zu], order: %zu" , i, orders[i]); |
326 | linked_list_init(head_node: &buddy.freelists[i]); |
327 | } |
328 | |
329 | const size_t order = std::min(a: log2(x: max_nframes), b: orders[MOS_ARRAY_SIZE(orders) - 1]); |
330 | populate_freelist(start_pfn: 0, nframes: max_nframes, order); |
331 | spinlock_release(&buddy_lock); |
332 | } |
333 | |
334 | void buddy_reserve_n(pfn_t pfn, size_t nframes) |
335 | { |
336 | spinlock_acquire(&buddy_lock); |
337 | pr_dinfo2(pmm_buddy, "reserving " PFN_RANGE " (%zu frames)" , pfn, pfn + nframes - 1, nframes); |
338 | extract_exact_range(start: pfn, nframes, state: phyframe::PHYFRAME_RESERVED); |
339 | spinlock_release(&buddy_lock); |
340 | } |
341 | |
342 | phyframe_t *buddy_alloc_n_exact(size_t nframes) |
343 | { |
344 | spinlock_acquire(&buddy_lock); |
345 | const size_t order = log2_ceil(nframes); |
346 | |
347 | // check if this order is too large |
348 | if (order > orders[MOS_ARRAY_SIZE(orders) - 1]) |
349 | return NULL; |
350 | |
351 | pr_dinfo2(pmm_buddy, "allocating %zu contiguous frames (order %zu, which is %zu frames, wasting %zu frames)" , nframes, order, pow2(order), pow2(order) - nframes); |
352 | |
353 | list_head *free = &buddy.freelists[order]; |
354 | if (list_is_empty(head: free)) |
355 | break_the_order(order: order + 1); |
356 | |
357 | if (unlikely(list_is_empty(free))) |
358 | { |
359 | spinlock_release(&buddy_lock); |
360 | pr_emerg("no free frames of order %zu, can't break" , order); |
361 | pr_emerg("out of memory!" ); |
362 | return NULL; // out of memory! |
363 | } |
364 | |
365 | const auto frame = container_of(list_entry(free->next, phyframe_t::additional_info), phyframe_t, info); |
366 | const pfn_t start = phyframe_pfn(frame); |
367 | |
368 | extract_exact_range(start, nframes, state: phyframe::PHYFRAME_ALLOCATED); // extract the exact range from the buddy.freelists |
369 | |
370 | for (size_t i = 0; i < nframes; i++) |
371 | { |
372 | phyframe_t *const f = pfn_phyframe(start + i); |
373 | f->state = phyframe::PHYFRAME_ALLOCATED; |
374 | f->order = 0; // so that they can be freed individually |
375 | } |
376 | |
377 | spinlock_release(&buddy_lock); |
378 | return frame; |
379 | } |
380 | |
381 | void buddy_free_n(pfn_t pfn, size_t nframes) |
382 | { |
383 | pr_dinfo2(pmm_buddy, "freeing " PFN_RANGE " (%zu frames)" , pfn, pfn + nframes - 1, nframes); |
384 | spinlock_acquire(&buddy_lock); |
385 | |
386 | phyframe_t *const frame = pfn_phyframe(pfn); |
387 | MOS_ASSERT_X(frame->state == phyframe::PHYFRAME_ALLOCATED, "frame must be allocated" ); |
388 | MOS_ASSERT(list_is_empty(&frame->info.list_node)); |
389 | frame->state = phyframe::PHYFRAME_FREE; |
390 | |
391 | const size_t order = log2_ceil(nframes); |
392 | if (!try_merge(pfn, order)) |
393 | add_to_freelist(order, frame); |
394 | |
395 | spinlock_release(&buddy_lock); |
396 | } |
397 | |