2016-11-02 11:16:19 +01:00
|
|
|
// Copyright (c) 2016 The Bitcoin Core developers
|
|
|
|
// Distributed under the MIT software license, see the accompanying
|
|
|
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|
|
|
|
2020-03-19 23:46:56 +01:00
|
|
|
#include <support/lockedpool.h>
|
|
|
|
#include <support/cleanse.h>
|
2016-11-02 11:16:19 +01:00
|
|
|
|
|
|
|
#if defined(HAVE_CONFIG_H)
|
2020-03-19 23:46:56 +01:00
|
|
|
#include <config/dash-config.h>
|
2016-11-02 11:16:19 +01:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef WIN32
|
|
|
|
#define WIN32_LEAN_AND_MEAN 1
|
|
|
|
#ifndef NOMINMAX
|
|
|
|
#define NOMINMAX
|
|
|
|
#endif
|
|
|
|
#include <windows.h>
|
|
|
|
#else
|
|
|
|
#include <sys/mman.h> // for mmap
|
|
|
|
#include <sys/resource.h> // for getrlimit
|
|
|
|
#include <limits.h> // for PAGESIZE
|
|
|
|
#include <unistd.h> // for sysconf
|
|
|
|
#endif
|
|
|
|
|
2016-11-07 09:21:15 +01:00
|
|
|
#include <algorithm>
|
2019-11-20 15:36:43 +01:00
|
|
|
#ifdef ARENA_DEBUG
|
|
|
|
#include <iomanip>
|
|
|
|
#include <iostream>
|
|
|
|
#endif
|
2016-11-07 09:21:15 +01:00
|
|
|
|
2019-08-06 05:08:33 +02:00
|
|
|
LockedPoolManager* LockedPoolManager::_instance = nullptr;
|
2016-11-02 11:16:19 +01:00
|
|
|
std::once_flag LockedPoolManager::init_flag;
|
|
|
|
|
|
|
|
/*******************************************************************************/
|
|
|
|
// Utilities
|
|
|
|
//
|
|
|
|
/** Align up to power of 2 */
|
|
|
|
static inline size_t align_up(size_t x, size_t align)
|
|
|
|
{
|
|
|
|
return (x + align - 1) & ~(align - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************************/
|
|
|
|
// Implementation: Arena
|
|
|
|
|
|
|
|
Arena::Arena(void *base_in, size_t size_in, size_t alignment_in):
|
|
|
|
base(static_cast<char*>(base_in)), end(static_cast<char*>(base_in) + size_in), alignment(alignment_in)
|
|
|
|
{
|
|
|
|
// Start with one free chunk that covers the entire arena
|
Merge #12048: Use best-fit strategy in Arena, now O(log(n)) instead O(n)
5fbf7c4 fix nits: variable naming, typos (Martin Ankerl)
1e0ee90 Use best-fit strategy in Arena, now O(log(n)) instead O(n) (Martin Ankerl)
Pull request description:
This replaces the first-fit algorithm used in the Arena with a best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review", Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, both startegies work well in practice.
The advantage of using best-fit is that we can switch the O(n) allocation to O(log(n)). Additionally, some previously O(log(n)) operations are now O(1) operations by using hash maps. The end effect is that the benchmark runs about 2.5 times faster on my machine:
# Benchmark, evals, iterations, total, min, max, median
old: BenchLockedPool, 5, 530, 5.25749, 0.00196938, 0.00199755, 0.00198172
new: BenchLockedPool, 5, 1300, 5.11313, 0.000781493, 0.000793314, 0.00078606
I've run all unit tests and benchmarks, and increased the number of iterations so that BenchLockedPool takes about 5 seconds again.
Tree-SHA512: 6551e384671f93f10c60df530a29a1954bd265cc305411f665a8756525e5afe2873a8032c797d00b6e8c07e16d9827465d0b662875433147381474a44119ccce
2018-03-22 14:28:19 +01:00
|
|
|
auto it = size_to_free_chunk.emplace(size_in, base);
|
|
|
|
chunks_free.emplace(base, it);
|
|
|
|
chunks_free_end.emplace(base + size_in, it);
|
2016-11-02 11:16:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
Arena::~Arena()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void* Arena::alloc(size_t size)
|
|
|
|
{
|
|
|
|
// Round to next multiple of alignment
|
|
|
|
size = align_up(size, alignment);
|
|
|
|
|
2016-11-07 09:21:15 +01:00
|
|
|
// Don't handle zero-sized chunks
|
|
|
|
if (size == 0)
|
2016-11-02 11:16:19 +01:00
|
|
|
return nullptr;
|
|
|
|
|
Merge #12048: Use best-fit strategy in Arena, now O(log(n)) instead O(n)
5fbf7c4 fix nits: variable naming, typos (Martin Ankerl)
1e0ee90 Use best-fit strategy in Arena, now O(log(n)) instead O(n) (Martin Ankerl)
Pull request description:
This replaces the first-fit algorithm used in the Arena with a best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review", Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, both startegies work well in practice.
The advantage of using best-fit is that we can switch the O(n) allocation to O(log(n)). Additionally, some previously O(log(n)) operations are now O(1) operations by using hash maps. The end effect is that the benchmark runs about 2.5 times faster on my machine:
# Benchmark, evals, iterations, total, min, max, median
old: BenchLockedPool, 5, 530, 5.25749, 0.00196938, 0.00199755, 0.00198172
new: BenchLockedPool, 5, 1300, 5.11313, 0.000781493, 0.000793314, 0.00078606
I've run all unit tests and benchmarks, and increased the number of iterations so that BenchLockedPool takes about 5 seconds again.
Tree-SHA512: 6551e384671f93f10c60df530a29a1954bd265cc305411f665a8756525e5afe2873a8032c797d00b6e8c07e16d9827465d0b662875433147381474a44119ccce
2018-03-22 14:28:19 +01:00
|
|
|
// Pick a large enough free-chunk. Returns an iterator pointing to the first element that is not less than key.
|
|
|
|
// This allocation strategy is best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review",
|
|
|
|
// Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, best-fit and first-fit
|
|
|
|
// policies seem to work well in practice.
|
|
|
|
auto size_ptr_it = size_to_free_chunk.lower_bound(size);
|
|
|
|
if (size_ptr_it == size_to_free_chunk.end())
|
2016-11-07 09:21:15 +01:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Create the used-chunk, taking its space from the end of the free-chunk
|
Merge #12048: Use best-fit strategy in Arena, now O(log(n)) instead O(n)
5fbf7c4 fix nits: variable naming, typos (Martin Ankerl)
1e0ee90 Use best-fit strategy in Arena, now O(log(n)) instead O(n) (Martin Ankerl)
Pull request description:
This replaces the first-fit algorithm used in the Arena with a best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review", Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, both startegies work well in practice.
The advantage of using best-fit is that we can switch the O(n) allocation to O(log(n)). Additionally, some previously O(log(n)) operations are now O(1) operations by using hash maps. The end effect is that the benchmark runs about 2.5 times faster on my machine:
# Benchmark, evals, iterations, total, min, max, median
old: BenchLockedPool, 5, 530, 5.25749, 0.00196938, 0.00199755, 0.00198172
new: BenchLockedPool, 5, 1300, 5.11313, 0.000781493, 0.000793314, 0.00078606
I've run all unit tests and benchmarks, and increased the number of iterations so that BenchLockedPool takes about 5 seconds again.
Tree-SHA512: 6551e384671f93f10c60df530a29a1954bd265cc305411f665a8756525e5afe2873a8032c797d00b6e8c07e16d9827465d0b662875433147381474a44119ccce
2018-03-22 14:28:19 +01:00
|
|
|
const size_t size_remaining = size_ptr_it->first - size;
|
2018-09-06 00:12:39 +02:00
|
|
|
auto allocated = chunks_used.emplace(size_ptr_it->second + size_remaining, size).first;
|
Merge #12048: Use best-fit strategy in Arena, now O(log(n)) instead O(n)
5fbf7c4 fix nits: variable naming, typos (Martin Ankerl)
1e0ee90 Use best-fit strategy in Arena, now O(log(n)) instead O(n) (Martin Ankerl)
Pull request description:
This replaces the first-fit algorithm used in the Arena with a best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review", Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, both startegies work well in practice.
The advantage of using best-fit is that we can switch the O(n) allocation to O(log(n)). Additionally, some previously O(log(n)) operations are now O(1) operations by using hash maps. The end effect is that the benchmark runs about 2.5 times faster on my machine:
# Benchmark, evals, iterations, total, min, max, median
old: BenchLockedPool, 5, 530, 5.25749, 0.00196938, 0.00199755, 0.00198172
new: BenchLockedPool, 5, 1300, 5.11313, 0.000781493, 0.000793314, 0.00078606
I've run all unit tests and benchmarks, and increased the number of iterations so that BenchLockedPool takes about 5 seconds again.
Tree-SHA512: 6551e384671f93f10c60df530a29a1954bd265cc305411f665a8756525e5afe2873a8032c797d00b6e8c07e16d9827465d0b662875433147381474a44119ccce
2018-03-22 14:28:19 +01:00
|
|
|
chunks_free_end.erase(size_ptr_it->second + size_ptr_it->first);
|
|
|
|
if (size_ptr_it->first == size) {
|
|
|
|
// whole chunk is used up
|
|
|
|
chunks_free.erase(size_ptr_it->second);
|
|
|
|
} else {
|
|
|
|
// still some memory left in the chunk
|
|
|
|
auto it_remaining = size_to_free_chunk.emplace(size_remaining, size_ptr_it->second);
|
|
|
|
chunks_free[size_ptr_it->second] = it_remaining;
|
|
|
|
chunks_free_end.emplace(size_ptr_it->second + size_remaining, it_remaining);
|
2016-11-02 11:16:19 +01:00
|
|
|
}
|
Merge #12048: Use best-fit strategy in Arena, now O(log(n)) instead O(n)
5fbf7c4 fix nits: variable naming, typos (Martin Ankerl)
1e0ee90 Use best-fit strategy in Arena, now O(log(n)) instead O(n) (Martin Ankerl)
Pull request description:
This replaces the first-fit algorithm used in the Arena with a best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review", Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, both startegies work well in practice.
The advantage of using best-fit is that we can switch the O(n) allocation to O(log(n)). Additionally, some previously O(log(n)) operations are now O(1) operations by using hash maps. The end effect is that the benchmark runs about 2.5 times faster on my machine:
# Benchmark, evals, iterations, total, min, max, median
old: BenchLockedPool, 5, 530, 5.25749, 0.00196938, 0.00199755, 0.00198172
new: BenchLockedPool, 5, 1300, 5.11313, 0.000781493, 0.000793314, 0.00078606
I've run all unit tests and benchmarks, and increased the number of iterations so that BenchLockedPool takes about 5 seconds again.
Tree-SHA512: 6551e384671f93f10c60df530a29a1954bd265cc305411f665a8756525e5afe2873a8032c797d00b6e8c07e16d9827465d0b662875433147381474a44119ccce
2018-03-22 14:28:19 +01:00
|
|
|
size_to_free_chunk.erase(size_ptr_it);
|
|
|
|
|
2018-09-06 00:12:39 +02:00
|
|
|
return reinterpret_cast<void*>(allocated->first);
|
2016-11-02 11:16:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void Arena::free(void *ptr)
|
|
|
|
{
|
2019-08-06 05:08:33 +02:00
|
|
|
// Freeing the nullptr pointer is OK.
|
2016-11-02 11:16:19 +01:00
|
|
|
if (ptr == nullptr) {
|
|
|
|
return;
|
|
|
|
}
|
2016-11-07 09:21:15 +01:00
|
|
|
|
|
|
|
// Remove chunk from used map
|
|
|
|
auto i = chunks_used.find(static_cast<char*>(ptr));
|
|
|
|
if (i == chunks_used.end()) {
|
2016-11-02 11:16:19 +01:00
|
|
|
throw std::runtime_error("Arena: invalid or double free");
|
|
|
|
}
|
Merge #12048: Use best-fit strategy in Arena, now O(log(n)) instead O(n)
5fbf7c4 fix nits: variable naming, typos (Martin Ankerl)
1e0ee90 Use best-fit strategy in Arena, now O(log(n)) instead O(n) (Martin Ankerl)
Pull request description:
This replaces the first-fit algorithm used in the Arena with a best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review", Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, both startegies work well in practice.
The advantage of using best-fit is that we can switch the O(n) allocation to O(log(n)). Additionally, some previously O(log(n)) operations are now O(1) operations by using hash maps. The end effect is that the benchmark runs about 2.5 times faster on my machine:
# Benchmark, evals, iterations, total, min, max, median
old: BenchLockedPool, 5, 530, 5.25749, 0.00196938, 0.00199755, 0.00198172
new: BenchLockedPool, 5, 1300, 5.11313, 0.000781493, 0.000793314, 0.00078606
I've run all unit tests and benchmarks, and increased the number of iterations so that BenchLockedPool takes about 5 seconds again.
Tree-SHA512: 6551e384671f93f10c60df530a29a1954bd265cc305411f665a8756525e5afe2873a8032c797d00b6e8c07e16d9827465d0b662875433147381474a44119ccce
2018-03-22 14:28:19 +01:00
|
|
|
std::pair<char*, size_t> freed = *i;
|
2016-11-07 09:21:15 +01:00
|
|
|
chunks_used.erase(i);
|
2016-11-02 11:16:19 +01:00
|
|
|
|
Merge #12048: Use best-fit strategy in Arena, now O(log(n)) instead O(n)
5fbf7c4 fix nits: variable naming, typos (Martin Ankerl)
1e0ee90 Use best-fit strategy in Arena, now O(log(n)) instead O(n) (Martin Ankerl)
Pull request description:
This replaces the first-fit algorithm used in the Arena with a best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review", Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, both startegies work well in practice.
The advantage of using best-fit is that we can switch the O(n) allocation to O(log(n)). Additionally, some previously O(log(n)) operations are now O(1) operations by using hash maps. The end effect is that the benchmark runs about 2.5 times faster on my machine:
# Benchmark, evals, iterations, total, min, max, median
old: BenchLockedPool, 5, 530, 5.25749, 0.00196938, 0.00199755, 0.00198172
new: BenchLockedPool, 5, 1300, 5.11313, 0.000781493, 0.000793314, 0.00078606
I've run all unit tests and benchmarks, and increased the number of iterations so that BenchLockedPool takes about 5 seconds again.
Tree-SHA512: 6551e384671f93f10c60df530a29a1954bd265cc305411f665a8756525e5afe2873a8032c797d00b6e8c07e16d9827465d0b662875433147381474a44119ccce
2018-03-22 14:28:19 +01:00
|
|
|
// coalesce freed with previous chunk
|
|
|
|
auto prev = chunks_free_end.find(freed.first);
|
|
|
|
if (prev != chunks_free_end.end()) {
|
|
|
|
freed.first -= prev->second->first;
|
|
|
|
freed.second += prev->second->first;
|
|
|
|
size_to_free_chunk.erase(prev->second);
|
|
|
|
chunks_free_end.erase(prev);
|
|
|
|
}
|
|
|
|
|
|
|
|
// coalesce freed with chunk after freed
|
|
|
|
auto next = chunks_free.find(freed.first + freed.second);
|
|
|
|
if (next != chunks_free.end()) {
|
|
|
|
freed.second += next->second->first;
|
|
|
|
size_to_free_chunk.erase(next->second);
|
2016-11-07 09:21:15 +01:00
|
|
|
chunks_free.erase(next);
|
Merge #12048: Use best-fit strategy in Arena, now O(log(n)) instead O(n)
5fbf7c4 fix nits: variable naming, typos (Martin Ankerl)
1e0ee90 Use best-fit strategy in Arena, now O(log(n)) instead O(n) (Martin Ankerl)
Pull request description:
This replaces the first-fit algorithm used in the Arena with a best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review", Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, both startegies work well in practice.
The advantage of using best-fit is that we can switch the O(n) allocation to O(log(n)). Additionally, some previously O(log(n)) operations are now O(1) operations by using hash maps. The end effect is that the benchmark runs about 2.5 times faster on my machine:
# Benchmark, evals, iterations, total, min, max, median
old: BenchLockedPool, 5, 530, 5.25749, 0.00196938, 0.00199755, 0.00198172
new: BenchLockedPool, 5, 1300, 5.11313, 0.000781493, 0.000793314, 0.00078606
I've run all unit tests and benchmarks, and increased the number of iterations so that BenchLockedPool takes about 5 seconds again.
Tree-SHA512: 6551e384671f93f10c60df530a29a1954bd265cc305411f665a8756525e5afe2873a8032c797d00b6e8c07e16d9827465d0b662875433147381474a44119ccce
2018-03-22 14:28:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add/set space with coalesced free chunk
|
|
|
|
auto it = size_to_free_chunk.emplace(freed.second, freed.first);
|
|
|
|
chunks_free[freed.first] = it;
|
|
|
|
chunks_free_end[freed.first + freed.second] = it;
|
2016-11-02 11:16:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
Arena::Stats Arena::stats() const
|
|
|
|
{
|
2016-11-07 09:21:15 +01:00
|
|
|
Arena::Stats r{ 0, 0, 0, chunks_used.size(), chunks_free.size() };
|
|
|
|
for (const auto& chunk: chunks_used)
|
|
|
|
r.used += chunk.second;
|
|
|
|
for (const auto& chunk: chunks_free)
|
Merge #12048: Use best-fit strategy in Arena, now O(log(n)) instead O(n)
5fbf7c4 fix nits: variable naming, typos (Martin Ankerl)
1e0ee90 Use best-fit strategy in Arena, now O(log(n)) instead O(n) (Martin Ankerl)
Pull request description:
This replaces the first-fit algorithm used in the Arena with a best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review", Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, both startegies work well in practice.
The advantage of using best-fit is that we can switch the O(n) allocation to O(log(n)). Additionally, some previously O(log(n)) operations are now O(1) operations by using hash maps. The end effect is that the benchmark runs about 2.5 times faster on my machine:
# Benchmark, evals, iterations, total, min, max, median
old: BenchLockedPool, 5, 530, 5.25749, 0.00196938, 0.00199755, 0.00198172
new: BenchLockedPool, 5, 1300, 5.11313, 0.000781493, 0.000793314, 0.00078606
I've run all unit tests and benchmarks, and increased the number of iterations so that BenchLockedPool takes about 5 seconds again.
Tree-SHA512: 6551e384671f93f10c60df530a29a1954bd265cc305411f665a8756525e5afe2873a8032c797d00b6e8c07e16d9827465d0b662875433147381474a44119ccce
2018-03-22 14:28:19 +01:00
|
|
|
r.free += chunk.second->first;
|
2016-11-07 09:21:15 +01:00
|
|
|
r.total = r.used + r.free;
|
2016-11-02 11:16:19 +01:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef ARENA_DEBUG
|
2018-05-04 22:42:39 +02:00
|
|
|
static void printchunk(void* base, size_t sz, bool used) {
|
2016-11-07 09:21:15 +01:00
|
|
|
std::cout <<
|
|
|
|
"0x" << std::hex << std::setw(16) << std::setfill('0') << base <<
|
|
|
|
" 0x" << std::hex << std::setw(16) << std::setfill('0') << sz <<
|
|
|
|
" 0x" << used << std::endl;
|
|
|
|
}
|
2016-11-02 11:16:19 +01:00
|
|
|
void Arena::walk() const
|
|
|
|
{
|
2016-11-07 09:21:15 +01:00
|
|
|
for (const auto& chunk: chunks_used)
|
|
|
|
printchunk(chunk.first, chunk.second, true);
|
|
|
|
std::cout << std::endl;
|
|
|
|
for (const auto& chunk: chunks_free)
|
2019-11-20 15:36:43 +01:00
|
|
|
printchunk(chunk.first, chunk.second->first, false);
|
2016-11-02 11:16:19 +01:00
|
|
|
std::cout << std::endl;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*******************************************************************************/
|
|
|
|
// Implementation: Win32LockedPageAllocator
|
|
|
|
|
|
|
|
#ifdef WIN32
|
|
|
|
/** LockedPageAllocator specialized for Windows.
|
|
|
|
*/
|
|
|
|
class Win32LockedPageAllocator: public LockedPageAllocator
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Win32LockedPageAllocator();
|
2017-06-28 14:33:44 +02:00
|
|
|
void* AllocateLocked(size_t len, bool *lockingSuccess) override;
|
|
|
|
void FreeLocked(void* addr, size_t len) override;
|
|
|
|
size_t GetLimit() override;
|
2016-11-02 11:16:19 +01:00
|
|
|
private:
|
|
|
|
size_t page_size;
|
|
|
|
};
|
|
|
|
|
|
|
|
Win32LockedPageAllocator::Win32LockedPageAllocator()
|
|
|
|
{
|
|
|
|
// Determine system page size in bytes
|
|
|
|
SYSTEM_INFO sSysInfo;
|
|
|
|
GetSystemInfo(&sSysInfo);
|
|
|
|
page_size = sSysInfo.dwPageSize;
|
|
|
|
}
|
|
|
|
void *Win32LockedPageAllocator::AllocateLocked(size_t len, bool *lockingSuccess)
|
|
|
|
{
|
|
|
|
len = align_up(len, page_size);
|
|
|
|
void *addr = VirtualAlloc(nullptr, len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
|
|
|
|
if (addr) {
|
|
|
|
// VirtualLock is used to attempt to keep keying material out of swap. Note
|
|
|
|
// that it does not provide this as a guarantee, but, in practice, memory
|
|
|
|
// that has been VirtualLock'd almost never gets written to the pagefile
|
|
|
|
// except in rare circumstances where memory is extremely low.
|
|
|
|
*lockingSuccess = VirtualLock(const_cast<void*>(addr), len) != 0;
|
|
|
|
}
|
|
|
|
return addr;
|
|
|
|
}
|
|
|
|
void Win32LockedPageAllocator::FreeLocked(void* addr, size_t len)
|
|
|
|
{
|
|
|
|
len = align_up(len, page_size);
|
|
|
|
memory_cleanse(addr, len);
|
|
|
|
VirtualUnlock(const_cast<void*>(addr), len);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t Win32LockedPageAllocator::GetLimit()
|
|
|
|
{
|
2018-03-21 16:16:28 +01:00
|
|
|
// TODO is there a limit on Windows, how to get it?
|
2016-11-02 11:16:19 +01:00
|
|
|
return std::numeric_limits<size_t>::max();
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*******************************************************************************/
|
|
|
|
// Implementation: PosixLockedPageAllocator
|
|
|
|
|
|
|
|
#ifndef WIN32
|
|
|
|
/** LockedPageAllocator specialized for OSes that don't try to be
|
|
|
|
* special snowflakes.
|
|
|
|
*/
|
|
|
|
class PosixLockedPageAllocator: public LockedPageAllocator
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
PosixLockedPageAllocator();
|
2017-06-28 14:33:44 +02:00
|
|
|
void* AllocateLocked(size_t len, bool *lockingSuccess) override;
|
|
|
|
void FreeLocked(void* addr, size_t len) override;
|
|
|
|
size_t GetLimit() override;
|
2016-11-02 11:16:19 +01:00
|
|
|
private:
|
|
|
|
size_t page_size;
|
|
|
|
};
|
|
|
|
|
|
|
|
PosixLockedPageAllocator::PosixLockedPageAllocator()
|
|
|
|
{
|
|
|
|
// Determine system page size in bytes
|
|
|
|
#if defined(PAGESIZE) // defined in limits.h
|
|
|
|
page_size = PAGESIZE;
|
|
|
|
#else // assume some POSIX OS
|
|
|
|
page_size = sysconf(_SC_PAGESIZE);
|
|
|
|
#endif
|
|
|
|
}
|
2016-11-02 14:40:51 +01:00
|
|
|
|
|
|
|
// Some systems (at least OS X) do not define MAP_ANONYMOUS yet and define
|
|
|
|
// MAP_ANON which is deprecated
|
|
|
|
#ifndef MAP_ANONYMOUS
|
|
|
|
#define MAP_ANONYMOUS MAP_ANON
|
|
|
|
#endif
|
|
|
|
|
2016-11-02 11:16:19 +01:00
|
|
|
void *PosixLockedPageAllocator::AllocateLocked(size_t len, bool *lockingSuccess)
|
|
|
|
{
|
|
|
|
void *addr;
|
|
|
|
len = align_up(len, page_size);
|
|
|
|
addr = mmap(nullptr, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
Merge #15117: Fix invalid memory write in case of failing mmap(...) in PosixLockedPageAllocator::AllocateLocked
ca126d490b0ff6960e135f3c77b2b2d4892a5744 Fix out-of-bounds write in case of failing mmap(...) in PosixLockedPageAllocator::AllocateLocked (practicalswift)
Pull request description:
`mmap(...)` returns `MAP_FAILED` (`(void *) -1`) in case of allocation failure.
`PosixLockedPageAllocator::AllocateLocked(...)` did not check for allocation failures prior to this PR.
Instead the invalid memory address `(void *) -1` (`0xffffffffffffffff`) was passed to the caller as if it was a valid address.
After some operations the address is wrapped around from `0xffffffffffffffff` to `0x00000003ffdf` (`0xffffffffffffffff + 262112 == 0x00000003ffdf`);
The resulting address `0x00000003ffdf` is then written to.
Before this patch (with failing `mmap` call):
```
$ src/bitcoind
…
2019-01-06T16:28:14Z Using the 'sse4(1way),sse41(4way)' SHA256 implementation
2019-01-06T16:28:14Z Using RdRand as an additional entropy source
Segmentation fault (core dumped)
```
Before this patch (under `valgrind` with failing `mmap` call):
```
$ valgrind src/bitcoind
…
2019-01-06T16:28:51Z Using the 'sse4(1way),sse41(4way)' SHA256 implementation
==17812== Invalid write of size 1
==17812== at 0x500B7E: void __gnu_cxx::new_allocator<unsigned char>::construct<unsigned char>(unsigned char*) (new_allocator.h:136)
==17812== by 0x500B52: _ZNSt16allocator_traitsI16secure_allocatorIhEE12_S_constructIhJEEENSt9enable_ifIXsr6__and_INS2_18__construct_helperIT_JDpT0_EE4typeEEE5valueEvE4typeERS1_PS6_DpOS7_ (alloc_traits.h:243)
==17812== by 0x500B22: _ZNSt16allocator_traitsI16secure_allocatorIhEE9constructIhJEEEDTcl12_S_constructfp_fp0_spclsr3stdE7forwardIT0_Efp1_EEERS1_PT_DpOS4_ (alloc_traits.h:344)
==17812== by 0x500982: unsigned char* std::__uninitialized_default_n_a<unsigned char*, unsigned long, secure_allocator<unsigned char> >(unsigned char*, unsigned long, secure_allocator<unsigned char>&) (stl_uninitialized.h:631)
==17812== by 0x60BFC2: std::vector<unsigned char, secure_allocator<unsigned char> >::_M_default_initialize(unsigned long) (stl_vector.h:1347)
==17812== by 0x60BD86: std::vector<unsigned char, secure_allocator<unsigned char> >::vector(unsigned long, secure_allocator<unsigned char> const&) (stl_vector.h:285)
==17812== by 0x60BB55: ECC_Start() (key.cpp:351)
==17812== by 0x16AC90: AppInitSanityChecks() (init.cpp:1162)
==17812== by 0x15BAC9: AppInit(int, char**) (bitcoind.cpp:138)
==17812== by 0x15B6C8: main (bitcoind.cpp:201)
==17812== Address 0x3ffdf is not stack'd, malloc'd or (recently) free'd
…
Segmentation fault (core dumped)
```
After this patch (with failing `mmap` call):
```
$ src/bitcoind
…
2019-01-06T15:50:18Z Using the 'sse4(1way),sse41(4way)' SHA256 implementation
2019-01-06T15:50:18Z Using RdRand as an additional entropy source
2019-01-06T15:50:18Z
************************
EXCEPTION: St9bad_alloc
std::bad_alloc
bitcoin in AppInit()
************************
EXCEPTION: St9bad_alloc
std::bad_alloc
bitcoin in AppInit()
2019-01-06T15:50:18Z Shutdown: In progress...
2019-01-06T15:50:18Z Shutdown: done
```
To simulate the failing `mmap` call apply the following to `master`:
```diff
diff --git a/src/support/lockedpool.cpp b/src/support/lockedpool.cpp
index 8d577cf52..ce79e569b 100644
--- a/src/support/lockedpool.cpp
+++ b/src/support/lockedpool.cpp
@@ -247,7 +247,8 @@ void *PosixLockedPageAllocator::AllocateLocked(size_t len, bool *lockingSuccess)
{
void *addr;
len = align_up(len, page_size);
- addr = mmap(nullptr, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ // addr = mmap(nullptr, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ addr = MAP_FAILED;
if (addr) {
*lockingSuccess = mlock(addr, len) == 0;
}
```
Tree-SHA512: 66947f5fc0fbb19afb3e1edbd51df07df9d16b77018cff3d48d30f378a53d6a0dc62bc36622b3966b7e374e61edbcca114ef4ac8ae8d725022c1a597edcbf7c7
2019-01-09 15:57:55 +01:00
|
|
|
if (addr == MAP_FAILED) {
|
|
|
|
return nullptr;
|
|
|
|
}
|
2016-11-02 11:16:19 +01:00
|
|
|
if (addr) {
|
|
|
|
*lockingSuccess = mlock(addr, len) == 0;
|
2020-05-04 16:30:35 +02:00
|
|
|
#if defined(MADV_DONTDUMP) // Linux
|
2020-03-26 16:55:49 +01:00
|
|
|
madvise(addr, len, MADV_DONTDUMP);
|
2020-05-04 16:30:35 +02:00
|
|
|
#elif defined(MADV_NOCORE) // FreeBSD
|
|
|
|
madvise(addr, len, MADV_NOCORE);
|
2020-03-26 16:55:49 +01:00
|
|
|
#endif
|
2016-11-02 11:16:19 +01:00
|
|
|
}
|
|
|
|
return addr;
|
|
|
|
}
|
|
|
|
void PosixLockedPageAllocator::FreeLocked(void* addr, size_t len)
|
|
|
|
{
|
|
|
|
len = align_up(len, page_size);
|
|
|
|
memory_cleanse(addr, len);
|
|
|
|
munlock(addr, len);
|
|
|
|
munmap(addr, len);
|
|
|
|
}
|
|
|
|
size_t PosixLockedPageAllocator::GetLimit()
|
|
|
|
{
|
|
|
|
#ifdef RLIMIT_MEMLOCK
|
|
|
|
struct rlimit rlim;
|
|
|
|
if (getrlimit(RLIMIT_MEMLOCK, &rlim) == 0) {
|
|
|
|
if (rlim.rlim_cur != RLIM_INFINITY) {
|
|
|
|
return rlim.rlim_cur;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return std::numeric_limits<size_t>::max();
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*******************************************************************************/
|
|
|
|
// Implementation: LockedPool
|
|
|
|
|
|
|
|
LockedPool::LockedPool(std::unique_ptr<LockedPageAllocator> allocator_in, LockingFailed_Callback lf_cb_in):
|
|
|
|
allocator(std::move(allocator_in)), lf_cb(lf_cb_in), cumulative_bytes_locked(0)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
LockedPool::~LockedPool()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
void* LockedPool::alloc(size_t size)
|
|
|
|
{
|
|
|
|
std::lock_guard<std::mutex> lock(mutex);
|
2016-11-07 09:21:15 +01:00
|
|
|
|
|
|
|
// Don't handle impossible sizes
|
|
|
|
if (size == 0 || size > ARENA_SIZE)
|
|
|
|
return nullptr;
|
|
|
|
|
2016-11-02 11:16:19 +01:00
|
|
|
// Try allocating from each current arena
|
|
|
|
for (auto &arena: arenas) {
|
|
|
|
void *addr = arena.alloc(size);
|
|
|
|
if (addr) {
|
|
|
|
return addr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// If that fails, create a new one
|
|
|
|
if (new_arena(ARENA_SIZE, ARENA_ALIGN)) {
|
|
|
|
return arenas.back().alloc(size);
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void LockedPool::free(void *ptr)
|
|
|
|
{
|
|
|
|
std::lock_guard<std::mutex> lock(mutex);
|
|
|
|
// TODO we can do better than this linear search by keeping a map of arena
|
|
|
|
// extents to arena, and looking up the address.
|
|
|
|
for (auto &arena: arenas) {
|
|
|
|
if (arena.addressInArena(ptr)) {
|
|
|
|
arena.free(ptr);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
throw std::runtime_error("LockedPool: invalid address not pointing to any arena");
|
|
|
|
}
|
|
|
|
|
|
|
|
LockedPool::Stats LockedPool::stats() const
|
|
|
|
{
|
|
|
|
std::lock_guard<std::mutex> lock(mutex);
|
2016-11-07 09:21:15 +01:00
|
|
|
LockedPool::Stats r{0, 0, 0, cumulative_bytes_locked, 0, 0};
|
2016-11-02 11:16:19 +01:00
|
|
|
for (const auto &arena: arenas) {
|
|
|
|
Arena::Stats i = arena.stats();
|
|
|
|
r.used += i.used;
|
|
|
|
r.free += i.free;
|
|
|
|
r.total += i.total;
|
|
|
|
r.chunks_used += i.chunks_used;
|
|
|
|
r.chunks_free += i.chunks_free;
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool LockedPool::new_arena(size_t size, size_t align)
|
|
|
|
{
|
|
|
|
bool locked;
|
|
|
|
// If this is the first arena, handle this specially: Cap the upper size
|
|
|
|
// by the process limit. This makes sure that the first arena will at least
|
|
|
|
// be locked. An exception to this is if the process limit is 0:
|
|
|
|
// in this case no memory can be locked at all so we'll skip past this logic.
|
|
|
|
if (arenas.empty()) {
|
|
|
|
size_t limit = allocator->GetLimit();
|
|
|
|
if (limit > 0) {
|
|
|
|
size = std::min(size, limit);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
void *addr = allocator->AllocateLocked(size, &locked);
|
|
|
|
if (!addr) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (locked) {
|
|
|
|
cumulative_bytes_locked += size;
|
|
|
|
} else if (lf_cb) { // Call the locking-failed callback if locking failed
|
|
|
|
if (!lf_cb()) { // If the callback returns false, free the memory and fail, otherwise consider the user warned and proceed.
|
|
|
|
allocator->FreeLocked(addr, size);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
arenas.emplace_back(allocator.get(), addr, size, align);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
LockedPool::LockedPageArena::LockedPageArena(LockedPageAllocator *allocator_in, void *base_in, size_t size_in, size_t align_in):
|
|
|
|
Arena(base_in, size_in, align_in), base(base_in), size(size_in), allocator(allocator_in)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
LockedPool::LockedPageArena::~LockedPageArena()
|
|
|
|
{
|
|
|
|
allocator->FreeLocked(base, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************************/
|
|
|
|
// Implementation: LockedPoolManager
|
|
|
|
//
|
2017-03-03 15:48:18 +01:00
|
|
|
LockedPoolManager::LockedPoolManager(std::unique_ptr<LockedPageAllocator> allocator_in):
|
|
|
|
LockedPool(std::move(allocator_in), &LockedPoolManager::LockingFailed)
|
2016-11-02 11:16:19 +01:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
bool LockedPoolManager::LockingFailed()
|
|
|
|
{
|
|
|
|
// TODO: log something but how? without including util.h
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void LockedPoolManager::CreateInstance()
|
|
|
|
{
|
|
|
|
// Using a local static instance guarantees that the object is initialized
|
|
|
|
// when it's first needed and also deinitialized after all objects that use
|
|
|
|
// it are done with it. I can think of one unlikely scenario where we may
|
|
|
|
// have a static deinitialization order/problem, but the check in
|
|
|
|
// LockedPoolManagerBase's destructor helps us detect if that ever happens.
|
|
|
|
#ifdef WIN32
|
2021-10-11 19:11:42 +02:00
|
|
|
std::unique_ptr<LockedPageAllocator> allocator{std::make_unique<Win32LockedPageAllocator>()};
|
2016-11-02 11:16:19 +01:00
|
|
|
#else
|
2021-10-11 19:11:42 +02:00
|
|
|
std::unique_ptr<LockedPageAllocator> allocator{std::make_unique<PosixLockedPageAllocator>()};
|
2016-11-02 11:16:19 +01:00
|
|
|
#endif
|
|
|
|
static LockedPoolManager instance(std::move(allocator));
|
|
|
|
LockedPoolManager::_instance = &instance;
|
|
|
|
}
|