From cd3bedaf1c45cb372877644ca2c67ef59fbe4058 Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Tue, 9 Feb 2021 11:59:19 +0100 Subject: [PATCH] Merge #20557: addrman: Fix new table bucketing during unserialization 4676a4fb5be0f6ef0b3f71c1f4361c20f7cb0e0b [addrman] Don't repeat "Bucketing method was updated" log multiple times (John Newbery) 436292367c1d737cf73bd985293539500d1206f5 [addrman] Improve serialization comments (John Newbery) ac3547eddd8a7d67b4103508f30d5d02a9c1f148 [addrman] Improve variable naming/code style of touched code. (John Newbery) a5c9b04959f443372400f9a736c6eaf5502284a1 [addrman] Don't rebucket new table entries unnecessarily (John Newbery) 8062d928ce5c495c1b6ecd18e4b30c12da822d90 [addrman] Rename asmap version to asmap checksum (John Newbery) 009b8e0fdf3bfb11668edacced5d8b70726d5d0e [addrman] Improve variable naming/code style of touched code. (John Newbery) b4c5fda417dd9ff8bf9fe24a87d384a649e3730d [addrman] Fix new table bucketing during unserialization (John Newbery) Pull request description: This fixes three issues in addrman unserialization. 1. An addrman entry can appear in up to 8 new table buckets. We store this entry->bucket indexing during shutdown so that on restart we can restore the entries to their correct buckets. Commit ec45646de9e62b3d42c85716bfeb06d8f2b507dc broke the deserialization code so that each entry could only be put in up to one new bucket. 2. Unserialization may result in an entry appearing in a 9th bucket. If the entry already appears in 8 buckets don't try to place it in another bucket. 3. We unnecessarily rebucket when reading a peers.dat with file version 1. Don't do that. ACKs for top commit: vasild: ACK 4676a4fb5be0f6ef0b3f71c1f4361c20f7cb0e0b glozow: re-ACK https://github.com/bitcoin/bitcoin/commit/4676a4fb5be0f6ef0b3f71c1f4361c20f7cb0e0b, changes were a rename, comments, and removing repeat-logging. naumenkogs: ACK 4676a4f laanwj: Code review ACK 4676a4fb5be0f6ef0b3f71c1f4361c20f7cb0e0b dhruv: ACK 4676a4fb5be0f6ef0b3f71c1f4361c20f7cb0e0b ryanofsky: Code review ACK 4676a4fb5be0f6ef0b3f71c1f4361c20f7cb0e0b. I'm not previously familiar with this code but all the changes here do make sense and seem like improvements. Left some notes and comments, but they aren't important so feel to ignore. Tree-SHA512: b228984f6dec5910be23c3740ae20258da33bcf66ceb7edb10e5a53163450f743bab349e47f09808b7e8d40f27143119ec3e0981d7e678aa494d8559a1c99c23 --- src/addrman.h | 92 +++++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 39 deletions(-) diff --git a/src/addrman.h b/src/addrman.h index 26c03c4c7a..a42ec4769d 100644 --- a/src/addrman.h +++ b/src/addrman.h @@ -332,22 +332,20 @@ public: * * nNew * * nTried * * number of "new" buckets XOR 2**30 - * * all nNew addrinfos in vvNew - * * all nTried addrinfos in vvTried - * * for each bucket: + * * all new addresses (total count: nNew) + * * all tried addresses (total count: nTried) + * * for each new bucket: * * number of elements - * * for each element: index + * * for each element: index in the serialized "all new addresses" + * * asmap checksum * * 2**30 is xorred with the number of buckets to make addrman deserializer v0 detect it * as incompatible. This is necessary because it did not check the version number on * deserialization. * - * Notice that vvTried, mapAddr and vVector are never encoded explicitly; + * vvNew, vvTried, mapInfo, mapAddr and vRandom are never encoded explicitly; * they are instead reconstructed from the other information. * - * vvNew is serialized, but only used if ADDRMAN_UNKNOWN_BUCKET_COUNT didn't change, - * otherwise it is reconstructed as well. - * * This format is more complex, but significantly smaller (at most 1.5 MiB), and supports * changes to the ADDRMAN_ parameters without breaking the on-disk structure. * @@ -405,13 +403,13 @@ public: } } } - // Store asmap version after bucket entries so that it + // Store asmap checksum after bucket entries so that it // can be ignored by older clients for backward compatibility. - uint256 asmap_version; + uint256 asmap_checksum; if (m_asmap.size() != 0) { - asmap_version = SerializeHash(m_asmap); + asmap_checksum = SerializeHash(m_asmap); } - s << asmap_version; + s << asmap_checksum; } template @@ -494,47 +492,63 @@ public: nTried -= nLost; // Store positions in the new table buckets to apply later (if possible). - std::map entryToBucket; // Represents which entry belonged to which bucket when serializing + // An entry may appear in up to ADDRMAN_NEW_BUCKETS_PER_ADDRESS buckets, + // so we store all bucket-entry_index pairs to iterate through later. + std::vector> bucket_entries; - for (int bucket = 0; bucket < nUBuckets; bucket++) { - int nSize = 0; - s >> nSize; - for (int n = 0; n < nSize; n++) { - int nIndex = 0; - s >> nIndex; - if (nIndex >= 0 && nIndex < nNew) { - entryToBucket[nIndex] = bucket; + for (int bucket = 0; bucket < nUBuckets; ++bucket) { + int num_entries{0}; + s >> num_entries; + for (int n = 0; n < num_entries; ++n) { + int entry_index{0}; + s >> entry_index; + if (entry_index >= 0 && entry_index < nNew) { + bucket_entries.emplace_back(bucket, entry_index); } } } - uint256 supplied_asmap_version; + // If the bucket count and asmap checksum haven't changed, then attempt + // to restore the entries to the buckets/positions they were in before + // serialization. + uint256 supplied_asmap_checksum; if (m_asmap.size() != 0) { - supplied_asmap_version = SerializeHash(m_asmap); + supplied_asmap_checksum = SerializeHash(m_asmap); } - uint256 serialized_asmap_version; + uint256 serialized_asmap_checksum; if (format >= Format::V2_ASMAP) { - s >> serialized_asmap_version; + s >> serialized_asmap_checksum; + } + const bool restore_bucketing{nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && + serialized_asmap_checksum == supplied_asmap_checksum}; + + if (!restore_bucketing) { + LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n"); } - for (int n = 0; n < nNew; n++) { - CAddrInfo &info = mapInfo[n]; - int bucket = entryToBucket[n]; - int nUBucketPos = info.GetBucketPosition(nKey, true, bucket); - if (format >= Format::V2_ASMAP && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && - info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_version == supplied_asmap_version) { + for (auto bucket_entry : bucket_entries) { + int bucket{bucket_entry.first}; + const int entry_index{bucket_entry.second}; + CAddrInfo& info = mapInfo[entry_index]; + + // The entry shouldn't appear in more than + // ADDRMAN_NEW_BUCKETS_PER_ADDRESS. If it has already, just skip + // this bucket_entry. + if (info.nRefCount >= ADDRMAN_NEW_BUCKETS_PER_ADDRESS) continue; + + int bucket_position = info.GetBucketPosition(nKey, true, bucket); + if (restore_bucketing && vvNew[bucket][bucket_position] == -1) { // Bucketing has not changed, using existing bucket positions for the new table - vvNew[bucket][nUBucketPos] = n; - info.nRefCount++; + vvNew[bucket][bucket_position] = entry_index; + ++info.nRefCount; } else { - // In case the new table data cannot be used (format unknown, bucket count wrong or new asmap), + // In case the new table data cannot be used (bucket count wrong or new asmap), // try to give them a reference based on their primary source address. - LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n"); bucket = info.GetNewBucket(nKey, m_asmap); - nUBucketPos = info.GetBucketPosition(nKey, true, bucket); - if (vvNew[bucket][nUBucketPos] == -1) { - vvNew[bucket][nUBucketPos] = n; - info.nRefCount++; + bucket_position = info.GetBucketPosition(nKey, true, bucket); + if (vvNew[bucket][bucket_position] == -1) { + vvNew[bucket][bucket_position] = entry_index; + ++info.nRefCount; } } }