From 573d3a0e0219758535d68ee18ef2809e50874590 Mon Sep 17 00:00:00 2001 From: fanquake Date: Wed, 5 Feb 2020 08:29:57 +0800 Subject: [PATCH] Merge #17336: scripts: search for first block file for linearize-data with some block files pruned 317fb96de9c6257972f1213b4ef2c3fe87dde99f Add search for first blk file with pruned node (Rjected) Pull request description: When bitcoind is running in pruned mode, producing a hashlist with `./linearize-hashes.py linearize.cfg > hashlist.txt` and then executing `linearize-data.py linearize.cfg` will produce: ``` Read 313001 hashes Input file /home/dan/.bitcoin/blocks/blk00000.dat Premature end of block data ``` This happens because `linearize-data` starts by attempting to process `blk00000.dat` regardless of whether or not `blk00000.dat` actually exists - this may not be the case if working with a pruned node. This PR adds a function which finds the first block file that does exist, and calls that function when the `BlockDataCopier` is initialized. This is a refactor of #16431. ACKs for top commit: darosior: ACK 317fb96de9c6257972f1213b4ef2c3fe87dde99f laanwj: Code review ACK 317fb96de9c6257972f1213b4ef2c3fe87dde99f theStack: Code review ACK https://github.com/bitcoin/bitcoin/pull/17336/commits/317fb96de9c6257972f1213b4ef2c3fe87dde99f Tree-SHA512: fc8014282df6cfe7b267e64db8ce7d82b86b758c302fbfea4a3c39b62d93512f5c2e31a0de4e9c5ec18fc0268c917f011257d37b45afaef6033eec90e4aa585f --- contrib/linearize/linearize-data.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py index b94d2885d4..108058ac83 100755 --- a/contrib/linearize/linearize-data.py +++ b/contrib/linearize/linearize-data.py @@ -16,6 +16,7 @@ import sys import dash_hash import datetime import time +import glob from collections import namedtuple from binascii import hexlify, unhexlify @@ -95,6 +96,30 @@ def mkblockmap(blkindex): blkmap[hash] = height return blkmap +# This gets the first block file ID that exists from the input block +# file directory. +def getFirstBlockFileId(block_dir_path): + # First, this sets up a pattern to search for block files, for + # example 'blkNNNNN.dat'. + blkFilePattern = os.path.join(block_dir_path, "blk[0-9][0-9][0-9][0-9][0-9].dat") + + # This search is done with glob + blkFnList = glob.glob(blkFilePattern) + + if len(blkFnList) == 0: + print("blocks not pruned - starting at 0") + return 0 + # We then get the lexicographic minimum, which should be the first + # block file name. + firstBlkFilePath = min(blkFnList) + firstBlkFn = os.path.basename(firstBlkFilePath) + + # now, the string should be ['b','l','k','N','N','N','N','N','.','d','a','t'] + # So get the ID by choosing: 3 4 5 6 7 + # The ID is not necessarily 0 if this is a pruned node. + blkId = int(firstBlkFn[3:8]) + return blkId + # Block header and extent on disk BlockExtent = namedtuple('BlockExtent', ['fn', 'offset', 'inhdr', 'blkhdr', 'size']) @@ -104,7 +129,9 @@ class BlockDataCopier: self.blkindex = blkindex self.blkmap = blkmap - self.inFn = 0 + # Get first occurring block file id - for pruned nodes this + # will not necessarily be 0 + self.inFn = getFirstBlockFileId(self.settings['input']) self.inF = None self.outFn = 0 self.outsz = 0