mirror of
https://github.com/dashpay/dash.git
synced 2024-12-25 20:12:57 +01:00
4aa197dbdb
fa4632c41714dfaa699bacc6a947d72668a4deef test: Move boost/stdlib includes last (MarcoFalke) fa488f131fd4f5bab0d01376c5a5013306f1abcd scripted-diff: Bump copyright headers (MarcoFalke) fac5c373006a9e4bcbb56843bb85f1aca4d87599 scripted-diff: Sort test includes (MarcoFalke) Pull request description: When writing tests, often includes need to be added or removed. Currently the list of includes is not sorted, so developers that write tests and have `clang-format` installed will either have an unrelated change (sorting) included in their commit or they will have to manually undo the sort. This pull preempts both issues by just sorting all includes in one commit. Please be aware that this is **NOT** a change to policy to enforce clang-format or any other developer guideline or process. Developers are free to use whatever tool they want, see also #18651. Edit: Also includes a commit to bump the copyright headers, so that the touched files don't need to be touched again for that. ACKs for top commit: practicalswift: ACK fa4632c41714dfaa699bacc6a947d72668a4deef jonatack: ACK fa4632c41714dfaa, light review and sanity checks with gcc build and clang fuzz build Tree-SHA512: 130a8d073a379ba556b1e64104d37c46b671425c0aef0ed725fd60156a95e8dc83fb6f0b5330b2f8152cf5daaf3983b4aca5e75812598f2626c39fd12b88b180
343 lines
12 KiB
Python
Executable File
343 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# linearize-data.py: Construct a linear, no-fork version of the chain.
|
|
#
|
|
# Copyright (c) 2013-2020 The Bitcoin Core developers
|
|
# Distributed under the MIT software license, see the accompanying
|
|
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|
#
|
|
|
|
import struct
|
|
import re
|
|
import os
|
|
import os.path
|
|
import sys
|
|
import dash_hash
|
|
import datetime
|
|
import time
|
|
import glob
|
|
from collections import namedtuple
|
|
from binascii import unhexlify
|
|
|
|
settings = {}
|
|
|
|
def hex_switchEndian(s):
|
|
""" Switches the endianness of a hex string (in pairs of hex chars) """
|
|
pairList = [s[i:i+2].encode() for i in range(0, len(s), 2)]
|
|
return b''.join(pairList[::-1]).decode()
|
|
|
|
def uint32(x):
|
|
return x & 0xffffffff
|
|
|
|
def bytereverse(x):
|
|
return uint32(( ((x) << 24) | (((x) << 8) & 0x00ff0000) |
|
|
(((x) >> 8) & 0x0000ff00) | ((x) >> 24) ))
|
|
|
|
def bufreverse(in_buf):
|
|
out_words = []
|
|
for i in range(0, len(in_buf), 4):
|
|
word = struct.unpack('@I', in_buf[i:i+4])[0]
|
|
out_words.append(struct.pack('@I', bytereverse(word)))
|
|
return b''.join(out_words)
|
|
|
|
def wordreverse(in_buf):
|
|
out_words = []
|
|
for i in range(0, len(in_buf), 4):
|
|
out_words.append(in_buf[i:i+4])
|
|
out_words.reverse()
|
|
return b''.join(out_words)
|
|
|
|
def calc_hdr_hash(blk_hdr):
|
|
return dash_hash.getPoWHash(blk_hdr)
|
|
|
|
def calc_hash_str(blk_hdr):
|
|
hash = calc_hdr_hash(blk_hdr)
|
|
hash = bufreverse(hash)
|
|
hash = wordreverse(hash)
|
|
hash_str = hash.hex()
|
|
return hash_str
|
|
|
|
def get_blk_dt(blk_hdr):
|
|
members = struct.unpack("<I", blk_hdr[68:68+4])
|
|
nTime = members[0]
|
|
dt = datetime.datetime.fromtimestamp(nTime)
|
|
dt_ym = datetime.datetime(dt.year, dt.month, 1)
|
|
return (dt_ym, nTime)
|
|
|
|
# When getting the list of block hashes, undo any byte reversals.
|
|
def get_block_hashes(settings):
|
|
blkindex = []
|
|
f = open(settings['hashlist'], "r", encoding="utf8")
|
|
for line in f:
|
|
line = line.rstrip()
|
|
if settings['rev_hash_bytes'] == 'true':
|
|
line = hex_switchEndian(line)
|
|
blkindex.append(line)
|
|
|
|
print("Read " + str(len(blkindex)) + " hashes")
|
|
|
|
return blkindex
|
|
|
|
# The block map shouldn't give or receive byte-reversed hashes.
|
|
def mkblockmap(blkindex):
|
|
blkmap = {}
|
|
for height,hash in enumerate(blkindex):
|
|
blkmap[hash] = height
|
|
return blkmap
|
|
|
|
# This gets the first block file ID that exists from the input block
|
|
# file directory.
|
|
def getFirstBlockFileId(block_dir_path):
|
|
# First, this sets up a pattern to search for block files, for
|
|
# example 'blkNNNNN.dat'.
|
|
blkFilePattern = os.path.join(block_dir_path, "blk[0-9][0-9][0-9][0-9][0-9].dat")
|
|
|
|
# This search is done with glob
|
|
blkFnList = glob.glob(blkFilePattern)
|
|
|
|
if len(blkFnList) == 0:
|
|
print("blocks not pruned - starting at 0")
|
|
return 0
|
|
# We then get the lexicographic minimum, which should be the first
|
|
# block file name.
|
|
firstBlkFilePath = min(blkFnList)
|
|
firstBlkFn = os.path.basename(firstBlkFilePath)
|
|
|
|
# now, the string should be ['b','l','k','N','N','N','N','N','.','d','a','t']
|
|
# So get the ID by choosing: 3 4 5 6 7
|
|
# The ID is not necessarily 0 if this is a pruned node.
|
|
blkId = int(firstBlkFn[3:8])
|
|
return blkId
|
|
|
|
# Block header and extent on disk
|
|
BlockExtent = namedtuple('BlockExtent', ['fn', 'offset', 'inhdr', 'blkhdr', 'size'])
|
|
|
|
class BlockDataCopier:
|
|
def __init__(self, settings, blkindex, blkmap):
|
|
self.settings = settings
|
|
self.blkindex = blkindex
|
|
self.blkmap = blkmap
|
|
|
|
# Get first occurring block file id - for pruned nodes this
|
|
# will not necessarily be 0
|
|
self.inFn = getFirstBlockFileId(self.settings['input'])
|
|
self.inF = None
|
|
self.outFn = 0
|
|
self.outsz = 0
|
|
self.outF = None
|
|
self.outFname = None
|
|
self.blkCountIn = 0
|
|
self.blkCountOut = 0
|
|
|
|
self.lastDate = datetime.datetime(2000, 1, 1)
|
|
self.highTS = 1408893517 - 315360000
|
|
self.timestampSplit = False
|
|
self.fileOutput = True
|
|
self.setFileTime = False
|
|
self.maxOutSz = settings['max_out_sz']
|
|
if 'output' in settings:
|
|
self.fileOutput = False
|
|
if settings['file_timestamp'] != 0:
|
|
self.setFileTime = True
|
|
if settings['split_timestamp'] != 0:
|
|
self.timestampSplit = True
|
|
# Extents and cache for out-of-order blocks
|
|
self.blockExtents = {}
|
|
self.outOfOrderData = {}
|
|
self.outOfOrderSize = 0 # running total size for items in outOfOrderData
|
|
|
|
def writeBlock(self, inhdr, blk_hdr, rawblock):
|
|
blockSizeOnDisk = len(inhdr) + len(blk_hdr) + len(rawblock)
|
|
if not self.fileOutput and ((self.outsz + blockSizeOnDisk) > self.maxOutSz):
|
|
self.outF.close()
|
|
if self.setFileTime:
|
|
os.utime(self.outFname, (int(time.time()), self.highTS))
|
|
self.outF = None
|
|
self.outFname = None
|
|
self.outFn = self.outFn + 1
|
|
self.outsz = 0
|
|
|
|
(blkDate, blkTS) = get_blk_dt(blk_hdr)
|
|
if self.timestampSplit and (blkDate > self.lastDate):
|
|
print("New month " + blkDate.strftime("%Y-%m") + " @ " + self.hash_str)
|
|
self.lastDate = blkDate
|
|
if self.outF:
|
|
self.outF.close()
|
|
if self.setFileTime:
|
|
os.utime(self.outFname, (int(time.time()), self.highTS))
|
|
self.outF = None
|
|
self.outFname = None
|
|
self.outFn = self.outFn + 1
|
|
self.outsz = 0
|
|
|
|
if not self.outF:
|
|
if self.fileOutput:
|
|
self.outFname = self.settings['output_file']
|
|
else:
|
|
self.outFname = os.path.join(self.settings['output'], "blk%05d.dat" % self.outFn)
|
|
print("Output file " + self.outFname)
|
|
self.outF = open(self.outFname, "wb")
|
|
|
|
self.outF.write(inhdr)
|
|
self.outF.write(blk_hdr)
|
|
self.outF.write(rawblock)
|
|
self.outsz = self.outsz + len(inhdr) + len(blk_hdr) + len(rawblock)
|
|
|
|
self.blkCountOut = self.blkCountOut + 1
|
|
if blkTS > self.highTS:
|
|
self.highTS = blkTS
|
|
|
|
if (self.blkCountOut % 1000) == 0:
|
|
print('%i blocks scanned, %i blocks written (of %i, %.1f%% complete)' %
|
|
(self.blkCountIn, self.blkCountOut, len(self.blkindex), 100.0 * self.blkCountOut / len(self.blkindex)))
|
|
|
|
def inFileName(self, fn):
|
|
return os.path.join(self.settings['input'], "blk%05d.dat" % fn)
|
|
|
|
def fetchBlock(self, extent):
|
|
'''Fetch block contents from disk given extents'''
|
|
with open(self.inFileName(extent.fn), "rb") as f:
|
|
f.seek(extent.offset)
|
|
return f.read(extent.size)
|
|
|
|
def copyOneBlock(self):
|
|
'''Find the next block to be written in the input, and copy it to the output.'''
|
|
extent = self.blockExtents.pop(self.blkCountOut)
|
|
if self.blkCountOut in self.outOfOrderData:
|
|
# If the data is cached, use it from memory and remove from the cache
|
|
rawblock = self.outOfOrderData.pop(self.blkCountOut)
|
|
self.outOfOrderSize -= len(rawblock)
|
|
else: # Otherwise look up data on disk
|
|
rawblock = self.fetchBlock(extent)
|
|
|
|
self.writeBlock(extent.inhdr, extent.blkhdr, rawblock)
|
|
|
|
def run(self):
|
|
while self.blkCountOut < len(self.blkindex):
|
|
if not self.inF:
|
|
fname = self.inFileName(self.inFn)
|
|
print("Input file " + fname)
|
|
try:
|
|
self.inF = open(fname, "rb")
|
|
except IOError:
|
|
print("Premature end of block data")
|
|
return
|
|
|
|
inhdr = self.inF.read(8)
|
|
if (not inhdr or (inhdr[0] == "\0")):
|
|
self.inF.close()
|
|
self.inF = None
|
|
self.inFn = self.inFn + 1
|
|
continue
|
|
|
|
inMagic = inhdr[:4]
|
|
if (inMagic != self.settings['netmagic']):
|
|
# Seek backwards 7 bytes (skipping the first byte in the previous search)
|
|
# and continue searching from the new position if the magic bytes are not
|
|
# found.
|
|
self.inF.seek(-7, os.SEEK_CUR)
|
|
continue
|
|
inLenLE = inhdr[4:]
|
|
su = struct.unpack("<I", inLenLE)
|
|
inLen = su[0] - 80 # length without header
|
|
blk_hdr = self.inF.read(80)
|
|
inExtent = BlockExtent(self.inFn, self.inF.tell(), inhdr, blk_hdr, inLen)
|
|
|
|
self.hash_str = calc_hash_str(blk_hdr)
|
|
if not self.hash_str in blkmap:
|
|
# Because blocks can be written to files out-of-order as of 0.10, the script
|
|
# may encounter blocks it doesn't know about. Treat as debug output.
|
|
if settings['debug_output'] == 'true':
|
|
print("Skipping unknown block " + self.hash_str)
|
|
self.inF.seek(inLen, os.SEEK_CUR)
|
|
continue
|
|
|
|
blkHeight = self.blkmap[self.hash_str]
|
|
self.blkCountIn += 1
|
|
|
|
if self.blkCountOut == blkHeight:
|
|
# If in-order block, just copy
|
|
rawblock = self.inF.read(inLen)
|
|
self.writeBlock(inhdr, blk_hdr, rawblock)
|
|
|
|
# See if we can catch up to prior out-of-order blocks
|
|
while self.blkCountOut in self.blockExtents:
|
|
self.copyOneBlock()
|
|
|
|
else: # If out-of-order, skip over block data for now
|
|
self.blockExtents[blkHeight] = inExtent
|
|
if self.outOfOrderSize < self.settings['out_of_order_cache_sz']:
|
|
# If there is space in the cache, read the data
|
|
# Reading the data in file sequence instead of seeking and fetching it later is preferred,
|
|
# but we don't want to fill up memory
|
|
self.outOfOrderData[blkHeight] = self.inF.read(inLen)
|
|
self.outOfOrderSize += inLen
|
|
else: # If no space in cache, seek forward
|
|
self.inF.seek(inLen, os.SEEK_CUR)
|
|
|
|
print("Done (%i blocks written)" % (self.blkCountOut))
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) != 2:
|
|
print("Usage: linearize-data.py CONFIG-FILE")
|
|
sys.exit(1)
|
|
|
|
f = open(sys.argv[1], encoding="utf8")
|
|
for line in f:
|
|
# skip comment lines
|
|
m = re.search(r'^\s*#', line)
|
|
if m:
|
|
continue
|
|
|
|
# parse key=value lines
|
|
m = re.search(r'^(\w+)\s*=\s*(\S.*)$', line)
|
|
if m is None:
|
|
continue
|
|
settings[m.group(1)] = m.group(2)
|
|
f.close()
|
|
|
|
# Force hash byte format setting to be lowercase to make comparisons easier.
|
|
# Also place upfront in case any settings need to know about it.
|
|
if 'rev_hash_bytes' not in settings:
|
|
settings['rev_hash_bytes'] = 'false'
|
|
settings['rev_hash_bytes'] = settings['rev_hash_bytes'].lower()
|
|
|
|
if 'netmagic' not in settings:
|
|
settings['netmagic'] = 'bf0c6bbd'
|
|
if 'genesis' not in settings:
|
|
settings['genesis'] = '00000ffd590b1485b3caadc19b22e6379c733355108f107a430458cdf3407ab6'
|
|
if 'input' not in settings:
|
|
settings['input'] = 'input'
|
|
if 'hashlist' not in settings:
|
|
settings['hashlist'] = 'hashlist.txt'
|
|
if 'file_timestamp' not in settings:
|
|
settings['file_timestamp'] = 0
|
|
if 'split_timestamp' not in settings:
|
|
settings['split_timestamp'] = 0
|
|
if 'max_out_sz' not in settings:
|
|
settings['max_out_sz'] = 1000 * 1000 * 1000
|
|
if 'out_of_order_cache_sz' not in settings:
|
|
settings['out_of_order_cache_sz'] = 100 * 1000 * 1000
|
|
if 'debug_output' not in settings:
|
|
settings['debug_output'] = 'false'
|
|
|
|
settings['max_out_sz'] = int(settings['max_out_sz'])
|
|
settings['split_timestamp'] = int(settings['split_timestamp'])
|
|
settings['file_timestamp'] = int(settings['file_timestamp'])
|
|
settings['netmagic'] = unhexlify(settings['netmagic'].encode('utf-8'))
|
|
settings['out_of_order_cache_sz'] = int(settings['out_of_order_cache_sz'])
|
|
settings['debug_output'] = settings['debug_output'].lower()
|
|
|
|
if 'output_file' not in settings and 'output' not in settings:
|
|
print("Missing output file / directory")
|
|
sys.exit(1)
|
|
|
|
blkindex = get_block_hashes(settings)
|
|
blkmap = mkblockmap(blkindex)
|
|
|
|
# Block hash map won't be byte-reversed. Neither should the genesis hash.
|
|
if not settings['genesis'] in blkmap:
|
|
print("Genesis block not found in hashlist")
|
|
else:
|
|
BlockDataCopier(settings, blkindex, blkmap).run()
|