/* * Copyright 2025, Haiku, Inc. All rights reserved. * Distributed under the terms of the MIT License. * * Authors: * Augustin Cavalier */ #include "PagesAllocator.h" #include #include #include #include #include #include #include #include /*! The local page size. Must be a multiple of the system page size. */ static const size_t kPageSize = B_PAGE_SIZE; /*! The "largest useful" chunk size: any allocations larger than this * will get their own area, rather than sharing the common one(s). */ static const size_t kLargestUsefulChunk = 512 * kPageSize; /*! Amount of virtual address space to reserve when creating new areas. */ /* (The Haiku kernel will ignore reservations if there's no other address * space left, so using a large value here should not hurt.) */ static const size_t kReserveAddressSpace = 1 * 1024 * 1024 * 1024; /*! Cache up to this many percentage points of free memory (compared to used.) */ static const size_t kFreePercentage = 25; /*! Always forbid more free memory than this, even if it's below kFreePercentage. */ static const size_t kFreeMaximum = 128 * 1024 * 1024; /*! Always allow this much free memory, even if it's above kFreePercentage. */ static const size_t kFreeMinimum = 128 * kPageSize; /*! When a free chunk encompasses a whole area and is smaller than this, unmap it. */ static const size_t kMinimumFreeAreaSize = 16 * kPageSize; namespace { class PagesAllocator { struct FreeChunk; public: PagesAllocator() { mutex_init(&fLock, "PagesAllocator lock"); fUsed = fFree = 0; fLastArea = -1; } ~PagesAllocator() { } void BeforeFork() { mutex_lock(&fLock); } void AfterFork(bool parent) { if (parent) { mutex_unlock(&fLock); } else { if (fLastArea >= 0) fLastArea = area_for((void*)(fLastAreaTop - 1)); mutex_init(&fLock, "PagesAllocator lock"); } } status_t AllocatePages(size_t allocate, void*& address, bool& cleared) { MutexLocker locker(fLock); if (allocate > kLargestUsefulChunk) { // Create an area just for this allocation. locker.Unlock(); area_id area = create_area("heap large allocation", &address, B_ANY_ADDRESS, allocate, B_NO_LOCK, B_READ_AREA | B_WRITE_AREA); if (area >= 0) { locker.Lock(); fUsed += allocate; cleared = true; return B_OK; } return area; } if (fFree >= allocate) { // Try to use memory from the cache. FreeChunk* chunk = fChunksBySizeTree.FindClosest(allocate, true, true); if (chunk != NULL) { address = _Use(chunk, allocate); cleared = false; return B_OK; } } // Not enough memory in the cache. Allocate some more. FreeChunk* chunk; bool mostlyClear = false; status_t status = _Map(allocate, chunk, mostlyClear); if (status != B_OK) return status; address = _Use(chunk, allocate); cleared = mostlyClear; return B_OK; } status_t AllocatePagesAt(void* _address, size_t allocate, bool& cleared) { const addr_t address = (addr_t)_address; MutexLocker locker(fLock); if (allocate <= kLargestUsefulChunk) { FreeChunk* chunk = fChunksByAddressTree.FindClosest(address, false, true); if (chunk != NULL && chunk->NextAddress() > address) { // The address is in a free chunk. size_t remainingAfter = chunk->size - (address - (addr_t)chunk); if (remainingAfter < allocate) { if (chunk->NextAddress() != fLastAreaTop) return B_NO_MEMORY; size_t add = allocate - remainingAfter; status_t status = _ResizeLastArea(add); if (status != B_OK) return status; chunk = _Insert((void*)chunk->NextAddress(), add); } // Cut the beginning? if (address == (addr_t)chunk) { _Use(chunk, allocate); cleared = false; return B_OK; } // Why would anyone want to allocate such a specific address, anyway? debugger("PagesAllocator: middle-chunk allocation not implemented!"); return B_ERROR; } } if (address == fLastAreaTop) { if (allocate > kLargestUsefulChunk) return B_NO_MEMORY; status_t status = _ResizeLastArea(allocate); if (status == B_OK) { fUsed += allocate; cleared = true; return B_OK; } return status; } if (address >= (fLastAreaTop - fLastAreaSize) && address < fLastAreaTop) { // We didn't find a matching free chunk, so that isn't going to work. return EEXIST; } locker.Unlock(); // One last try: see if we can resize the area this belongs to. area_info info; info.area = area_for((void*)(address - 1)); if (info.area < 0) return info.area; status_t status = get_area_info(info.area, &info); if (status != B_OK) return status; if (((addr_t)info.address + info.size) != address) return B_NO_MEMORY; status = resize_area(info.area, info.size + allocate); if (status == B_OK) { locker.Lock(); fUsed += allocate; cleared = true; return B_OK; } // TODO: We could add a "resize allowing move" feature to resize_area, // which would avoid having to memcpy() the data of large allocations. return status; } status_t FreePages(void* _address, size_t size) { MutexLocker locker(fLock); if (size > fUsed) debugger("PagesAllocator: request to free more than allocated"); fUsed -= size; FreeChunk* freedChunk = _Insert(_address, size); if (size > kLargestUsefulChunk) { // TODO: This doesn't deal with a free of a smaller number of pages // within a "large allocation" area. We should probably free those // immediately also, or at least mark them specially in the trees // (so they don't get reused for anything but the large allocation.) locker.Detach(); return _UnlockingRemoveAndUnmap(freedChunk, false); } if (!_InLastArea(freedChunk) && freedChunk->size < kMinimumFreeAreaSize) { // If this chunk covers a whole area, or is at the head or tail of one // that's smaller than kMinimumFreeAreaSize, we unmap it immediately. // TODO: Find a way to avoid invoking syscalls here, at least in some cases? area_info info = {}; get_area_info(area_for(freedChunk), &info); if (info.size == size || (info.size < kMinimumFreeAreaSize && (info.address == freedChunk || ((addr_t)info.address + info.size) == freedChunk->NextAddress()))) { locker.Detach(); return _UnlockingRemoveAndUnmap(freedChunk, false); } } if (fFree <= _FreeLimit()) { // TODO: If not decommitting/unmapping, we might free the pages // using MADV_FREE (perhaps on medium-sized chunks with others // of equal sizes already present in the tree, at least.) return B_OK; } while (fFree > _FreeLimit()) { FreeChunk* chunk = fChunksBySizeTree.FindMax(); // Try to avoid unmapping the just-freed chunk, or any in the last area // besides the topmost. while (chunk == freedChunk || _InLastArea(chunk)) { if (chunk->NextAddress() == fLastAreaTop) break; FreeChunk* previousChunk = fChunksBySizeTree.PreviousDontSplay(chunk); if (previousChunk == NULL) { // Just free the maximum-size chunk, whatever it was. chunk = fChunksBySizeTree.FindMax(); break; } chunk = previousChunk; } status_t status = _UnlockingRemoveAndUnmap(chunk); if (status != B_OK) return status; } return B_OK; } private: void* _Use(FreeChunk* chunk, size_t amount) { fChunksBySizeTree.Remove(chunk); fChunksByAddressTree.Remove(chunk); if (chunk->size == amount) { // The whole chunk will be used. // Nothing special to do in this case. } else { // Some will be left over. // Break the remainder off and reinsert into the trees. FreeChunk* newChunk = (FreeChunk*)((addr_t)chunk + amount); newChunk->size = (chunk->size - amount); fChunksBySizeTree.Insert(newChunk); fChunksByAddressTree.Insert(newChunk); } fUsed += amount; fFree -= amount; memset(chunk, 0, sizeof(FreeChunk)); return (void*)chunk; } FreeChunk* _Insert(void* _address, size_t size) { fFree += size; const addr_t address = (addr_t)_address; FreeChunk* chunk; FreeChunk* preceding = fChunksByAddressTree.FindClosest(address, false, false); if (preceding != NULL && preceding->NextAddress() == address) { fChunksBySizeTree.Remove(preceding); chunk = preceding; chunk->size += size; } else { chunk = (FreeChunk*)_address; chunk->size = size; fChunksByAddressTree.Insert(chunk); } FreeChunk* following = chunk->address_tree_list_link; if (following != NULL && chunk->NextAddress() == (addr_t)following) { fChunksBySizeTree.Remove(following); fChunksByAddressTree.Remove(following); chunk->size += following->size; } fChunksBySizeTree.Insert(chunk); return chunk; } size_t _FreeLimit() const { size_t freeLimit = ((fUsed * kFreePercentage) / 100); if (freeLimit < kFreeMinimum) freeLimit = kFreeMinimum; else if (freeLimit > kFreeMaximum) freeLimit = kFreeMaximum; else freeLimit = (freeLimit + (kPageSize - 1)) & ~(kPageSize - 1); return freeLimit; } private: status_t _Map(size_t allocate, FreeChunk*& allocated, bool& mostlyClear) { if (fLastArea >= 0) { addr_t oldTop = fLastAreaTop; status_t status = _ResizeLastArea(allocate); if (status == B_OK) { allocated = _Insert((void*)oldTop, allocate); mostlyClear = (allocated == (void*)oldTop); return B_OK; } } // Create a new area. // TODO: We could use an inner lock here to avoid contention. addr_t newAreaBase; status_t status = _kern_reserve_address_range(&newAreaBase, B_RANDOMIZED_ANY_ADDRESS, kReserveAddressSpace); size_t newReservation = (status == B_OK) ? kReserveAddressSpace : 0; status = create_area("heap area", (void**)&newAreaBase, (status == B_OK) ? B_EXACT_ADDRESS : B_RANDOMIZED_ANY_ADDRESS, allocate, B_NO_LOCK, B_READ_AREA | B_WRITE_AREA); if (status < B_OK) return status; if (fLastAreaReservedTop > fLastAreaTop) _kern_unreserve_address_range(fLastAreaTop, fLastAreaReservedTop - fLastAreaTop); fLastArea = status; fLastAreaTop = newAreaBase + allocate; fLastAreaSize = allocate; fLastAreaReservedTop = newAreaBase + newReservation; allocated = _Insert((void*)newAreaBase, allocate); mostlyClear = (allocated == (void*)newAreaBase); return B_OK; } bool _InLastArea(const FreeChunk* chunk) const { return ((addr_t)chunk < fLastAreaTop && (addr_t)chunk >= (fLastAreaTop - fLastAreaSize)); } status_t _ResizeLastArea(size_t amount) { // TODO: We could use an inner lock here to avoid contention. status_t status = resize_area(fLastArea, fLastAreaSize + amount); if (status == B_OK) { fLastAreaTop += amount; fLastAreaSize += amount; } return status; } status_t _UnlockingRemoveAndUnmap(FreeChunk* chunk, bool relock = true) { fChunksByAddressTree.Remove(chunk); fChunksBySizeTree.Remove(chunk); fFree -= chunk->size; status_t status = _UnlockingUnmap(chunk); if (relock) mutex_lock(&fLock); if (status != B_OK) { if (!relock) mutex_lock(&fLock); // Unmap failed; reinsert the chunk. fChunksByAddressTree.Insert(chunk); fChunksBySizeTree.Insert(chunk); fFree += chunk->size; if (!relock) mutex_unlock(&fLock); } return status; } status_t _UnlockingUnmap(FreeChunk* chunk) { // TODO: We could use an inner lock here to avoid contention. MutexLocker locker(fLock, true); const size_t size = chunk->size; const addr_t address = (addr_t)chunk; const addr_t top = address + size; const addr_t lastAreaBase = (fLastAreaTop - fLastAreaSize); if (address <= lastAreaBase && top >= fLastAreaTop) { // The whole area is being deleted. const addr_t lastTop = fLastAreaTop, reservedTop = fLastAreaReservedTop; fLastArea = -1; fLastAreaTop = fLastAreaSize = fLastAreaReservedTop = 0; locker.Unlock(); if (reservedTop > lastTop) _kern_unreserve_address_range(lastTop, reservedTop - lastTop); } else if (top == fLastAreaTop) { // Shrink the top. status_t status = resize_area(fLastArea, fLastAreaSize - size); if (status != B_OK) return status; fLastAreaSize -= size; fLastAreaTop -= size; return B_OK; } else if (address == lastAreaBase) { // Shrink the bottom. if (munmap(chunk, size) != 0) return errno; fLastAreaSize -= size; return B_OK; } else if (address >= lastAreaBase && address < fLastAreaTop) { // Cut the middle and get the new ID. if (munmap(chunk, size) != 0) return errno; fLastAreaSize = fLastAreaTop - top; fLastArea = area_for((void*)(fLastAreaTop - 1)); return B_OK; } else { // Not in the last area. locker.Unlock(); } if (munmap(chunk, size) != 0) return errno; return B_OK; } private: struct FreeChunk { SplayTreeLink address_tree_link; SplayTreeLink size_tree_link; FreeChunk* address_tree_list_link; FreeChunk* size_tree_list_link; size_t size; public: inline addr_t NextAddress() const { return ((addr_t)this + size); } }; struct ChunksByAddressTreeDefinition { typedef addr_t KeyType; typedef FreeChunk NodeType; static addr_t GetKey(const FreeChunk* node) { return (addr_t)node; } static SplayTreeLink* GetLink(FreeChunk* node) { return &node->address_tree_link; } static int Compare(const addr_t& key, const FreeChunk* node) { if (key == (addr_t)node) return 0; return (key < (addr_t)node) ? -1 : 1; } static FreeChunk** GetListLink(FreeChunk* node) { return &node->address_tree_list_link; } }; typedef IteratableSplayTree ChunksByAddressTree; struct ChunksBySizeTreeDefinition { struct KeyType { size_t size; addr_t address; public: KeyType(size_t _size) : size(_size), address(0) {} KeyType(const FreeChunk* chunk) : size(chunk->size), address((addr_t)chunk) {} }; typedef FreeChunk NodeType; static KeyType GetKey(const FreeChunk* node) { return KeyType(node); } static SplayTreeLink* GetLink(FreeChunk* node) { return &node->size_tree_link; } static int Compare(const KeyType& key, const FreeChunk* node) { if (key.size == node->size) return ChunksByAddressTreeDefinition::Compare(key.address, node); return (key.size < node->size) ? -1 : 1; } static FreeChunk** GetListLink(FreeChunk* node) { return &node->size_tree_list_link; } }; typedef IteratableSplayTree ChunksBySizeTree; private: mutex fLock; size_t fUsed; size_t fFree; ChunksByAddressTree fChunksByAddressTree; ChunksBySizeTree fChunksBySizeTree; area_id fLastArea; addr_t fLastAreaTop; size_t fLastAreaSize; size_t fLastAreaReservedTop; }; } // namespace static char sPagesAllocatorStorage[sizeof(PagesAllocator)] #if defined(__GNUC__) && __GNUC__ >= 4 __attribute__((__aligned__(alignof(PagesAllocator)))) #endif ; static PagesAllocator* sPagesAllocator; void __init_pages_allocator() { sPagesAllocator = new(sPagesAllocatorStorage) PagesAllocator; } void __pages_allocator_before_fork() { sPagesAllocator->BeforeFork(); } void __pages_allocator_after_fork(int parent) { sPagesAllocator->AfterFork(parent); } status_t __allocate_pages(void** address, size_t length, int flags, uint8* cleared) { if ((length % kPageSize) != 0) debugger("PagesAllocator: incorrectly sized allocate"); if ((flags & MAP_FIXED) != 0) return sPagesAllocator->AllocatePagesAt(*address, length, *(bool*)cleared); //fprintf(stderr, "AllocatePages! 0x%x\n", (int)length); return sPagesAllocator->AllocatePages(length, *address, *(bool*)cleared); } status_t __free_pages(void* address, size_t length) { if ((length % kPageSize) != 0) debugger("PagesAllocator: incorrectly sized free"); //fprintf(stderr, "FreePages! %p, 0x%x\n", address, (int)length); sPagesAllocator->FreePages(address, length); // Errors returned from FreePages() indicate problems unmapping memory; // in case of failure the pages will always be added to the free lists. // So we never tell callers about failures here. return B_OK; }