Merge #12048: Use best-fit strategy in Arena, now O(log(n)) instead O(n)

5fbf7c4 fix nits: variable naming, typos (Martin Ankerl)
1e0ee90 Use best-fit strategy in Arena, now O(log(n)) instead O(n) (Martin Ankerl)

Pull request description:

  This replaces the first-fit algorithm used in the Arena with a best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review", Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, both startegies work well in practice.

  The advantage of using best-fit is that we can switch the O(n) allocation to O(log(n)). Additionally, some previously O(log(n)) operations are now O(1) operations by using hash maps. The end effect is that the benchmark runs about 2.5 times faster on my machine:

      # Benchmark, evals, iterations, total, min, max, median
      old: BenchLockedPool, 5, 530, 5.25749, 0.00196938, 0.00199755, 0.00198172
      new: BenchLockedPool, 5, 1300, 5.11313, 0.000781493, 0.000793314, 0.00078606

  I've run all unit tests and benchmarks, and increased the number of iterations so that BenchLockedPool takes about 5 seconds again.

Tree-SHA512: 6551e384671f93f10c60df530a29a1954bd265cc305411f665a8756525e5afe2873a8032c797d00b6e8c07e16d9827465d0b662875433147381474a44119ccce
This commit is contained in:
Wladimir J. van der Laan 2018-03-22 14:28:19 +01:00 committed by pasta
parent 4e17167c2d
commit 162bba0a6c
No known key found for this signature in database
GPG Key ID: 52527BEDABE87984
3 changed files with 60 additions and 31 deletions

View File

@ -43,4 +43,4 @@ static void BenchLockedPool(benchmark::State& state)
addr.clear(); addr.clear();
} }
BENCHMARK(BenchLockedPool, 530); BENCHMARK(BenchLockedPool, 1300);

View File

@ -47,7 +47,9 @@ Arena::Arena(void *base_in, size_t size_in, size_t alignment_in):
base(static_cast<char*>(base_in)), end(static_cast<char*>(base_in) + size_in), alignment(alignment_in) base(static_cast<char*>(base_in)), end(static_cast<char*>(base_in) + size_in), alignment(alignment_in)
{ {
// Start with one free chunk that covers the entire arena // Start with one free chunk that covers the entire arena
chunks_free.emplace(base, size_in); auto it = size_to_free_chunk.emplace(size_in, base);
chunks_free.emplace(base, it);
chunks_free_end.emplace(base + size_in, it);
} }
Arena::~Arena() Arena::~Arena()
@ -63,26 +65,30 @@ void* Arena::alloc(size_t size)
if (size == 0) if (size == 0)
return nullptr; return nullptr;
// Pick a large enough free-chunk // Pick a large enough free-chunk. Returns an iterator pointing to the first element that is not less than key.
auto it = std::find_if(chunks_free.begin(), chunks_free.end(), // This allocation strategy is best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review",
[=](const std::map<char*, size_t>::value_type& chunk){ return chunk.second >= size; }); // Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, best-fit and first-fit
if (it == chunks_free.end()) // policies seem to work well in practice.
auto size_ptr_it = size_to_free_chunk.lower_bound(size);
if (size_ptr_it == size_to_free_chunk.end())
return nullptr; return nullptr;
// Create the used-chunk, taking its space from the end of the free-chunk // Create the used-chunk, taking its space from the end of the free-chunk
auto alloced = chunks_used.emplace(it->first + it->second - size, size).first; const size_t size_remaining = size_ptr_it->first - size;
if (!(it->second -= size)) auto alloced = chunks_used.emplace(size_ptr_it->second + size_remaining, size).first;
chunks_free.erase(it); chunks_free_end.erase(size_ptr_it->second + size_ptr_it->first);
return reinterpret_cast<void*>(alloced->first); if (size_ptr_it->first == size) {
} // whole chunk is used up
chunks_free.erase(size_ptr_it->second);
/* extend the Iterator if other begins at its end */ } else {
template <class Iterator, class Pair> bool extend(Iterator it, const Pair& other) { // still some memory left in the chunk
if (it->first + it->second == other.first) { auto it_remaining = size_to_free_chunk.emplace(size_remaining, size_ptr_it->second);
it->second += other.second; chunks_free[size_ptr_it->second] = it_remaining;
return true; chunks_free_end.emplace(size_ptr_it->second + size_remaining, it_remaining);
} }
return false; size_to_free_chunk.erase(size_ptr_it);
return reinterpret_cast<void*>(alloced->first);
} }
void Arena::free(void *ptr) void Arena::free(void *ptr)
@ -97,16 +103,30 @@ void Arena::free(void *ptr)
if (i == chunks_used.end()) { if (i == chunks_used.end()) {
throw std::runtime_error("Arena: invalid or double free"); throw std::runtime_error("Arena: invalid or double free");
} }
auto freed = *i; std::pair<char*, size_t> freed = *i;
chunks_used.erase(i); chunks_used.erase(i);
// Add space to free map, coalescing contiguous chunks // coalesce freed with previous chunk
auto next = chunks_free.upper_bound(freed.first); auto prev = chunks_free_end.find(freed.first);
auto prev = (next == chunks_free.begin()) ? chunks_free.end() : std::prev(next); if (prev != chunks_free_end.end()) {
if (prev == chunks_free.end() || !extend(prev, freed)) freed.first -= prev->second->first;
prev = chunks_free.emplace_hint(next, freed); freed.second += prev->second->first;
if (next != chunks_free.end() && extend(prev, *next)) size_to_free_chunk.erase(prev->second);
chunks_free_end.erase(prev);
}
// coalesce freed with chunk after freed
auto next = chunks_free.find(freed.first + freed.second);
if (next != chunks_free.end()) {
freed.second += next->second->first;
size_to_free_chunk.erase(next->second);
chunks_free.erase(next); chunks_free.erase(next);
}
// Add/set space with coalesced free chunk
auto it = size_to_free_chunk.emplace(freed.second, freed.first);
chunks_free[freed.first] = it;
chunks_free_end[freed.first + freed.second] = it;
} }
Arena::Stats Arena::stats() const Arena::Stats Arena::stats() const
@ -115,7 +135,7 @@ Arena::Stats Arena::stats() const
for (const auto& chunk: chunks_used) for (const auto& chunk: chunks_used)
r.used += chunk.second; r.used += chunk.second;
for (const auto& chunk: chunks_free) for (const auto& chunk: chunks_free)
r.free += chunk.second; r.free += chunk.second->first;
r.total = r.used + r.free; r.total = r.used + r.free;
return r; return r;
} }

View File

@ -10,6 +10,7 @@
#include <map> #include <map>
#include <mutex> #include <mutex>
#include <memory> #include <memory>
#include <unordered_map>
/** /**
* OS-dependent allocation and deallocation of locked/pinned memory pages. * OS-dependent allocation and deallocation of locked/pinned memory pages.
@ -88,11 +89,19 @@ public:
*/ */
bool addressInArena(void *ptr) const { return ptr >= base && ptr < end; } bool addressInArena(void *ptr) const { return ptr >= base && ptr < end; }
private: private:
/** Map of chunk address to chunk information. This class makes use of the typedef std::multimap<size_t, char*> SizeToChunkSortedMap;
* sorted order to merge previous and next chunks during deallocation. /** Map to enable O(log(n)) best-fit allocation, as it's sorted by size */
*/ SizeToChunkSortedMap size_to_free_chunk;
std::map<char*, size_t> chunks_free;
std::map<char*, size_t> chunks_used; typedef std::unordered_map<char*, SizeToChunkSortedMap::const_iterator> ChunkToSizeMap;
/** Map from begin of free chunk to its node in size_to_free_chunk */
ChunkToSizeMap chunks_free;
/** Map from end of free chunk to its node in size_to_free_chunk */
ChunkToSizeMap chunks_free_end;
/** Map from begin of used chunk to its size */
std::unordered_map<char*, size_t> chunks_used;
/** Base address of arena */ /** Base address of arena */
char* base; char* base;
/** End address of arena */ /** End address of arena */