2017-02-11 08:46:00 +01:00
|
|
|
#!/usr/bin/env python3
|
2023-08-16 19:27:31 +02:00
|
|
|
# Copyright (c) 2013-2020 The Bitcoin Core developers
|
2016-09-19 18:54:19 +02:00
|
|
|
# Distributed under the MIT software license, see the accompanying
|
|
|
|
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
2013-01-30 04:17:56 +01:00
|
|
|
#
|
2019-12-10 16:18:44 +01:00
|
|
|
# Generate seeds.txt from "protx list valid 1"
|
2024-02-15 05:31:24 +01:00
|
|
|
# then create onion_seeds.txt and add some active onion services to it; check tor.md for some
|
2013-01-30 04:17:56 +01:00
|
|
|
#
|
|
|
|
|
Merge #12987: tests/tools: Enable additional Python flake8 rules for automatic linting via Travis
643aad17fa Enable additional flake8 rules (practicalswift)
f020aca297 Minor Python cleanups to make flake8 pass with the new rules enabled (practicalswift)
Pull request description:
Enabled rules:
```
* E242: tab after ','
* E266: too many leading '#' for block comment
* E401: multiple imports on one line
* E402: module level import not at top of file
* E701: multiple statements on one line (colon)
* E901: SyntaxError: invalid syntax
* E902: TokenError: EOF in multi-line string
* F821: undefined name 'Foo'
* W293: blank line contains whitespace
* W606: 'async' and 'await' are reserved keywords starting with Python 3.7
```
Note to reviewers:
* In general we don't allow whitespace cleanups to existing code, but in order to allow for enabling Travis checking for these rules a few smaller whitespace cleanups had to made as part of this PR.
* Use [this `?w=1` link](https://github.com/bitcoin/bitcoin/pull/12987/files?w=1) to show a diff without whitespace changes.
Before this commit:
```
$ flake8 -qq --statistics --ignore=B,C,E,F,I,N,W --select=E112,E113,E115,E116,E125,E131,E133,E223,E224,E242,E266,E271,E272,E273,E274,E275,E304,E306,E401,E402,E502,E701,E702,E703,E714,E721,E741,E742,E743,F401,E901,E902,F402,F404,F406,F407,F601,F602,F621,F622,F631,F701,F702,F703,F704,F705,F706,F707,F811,F812,F821,F822,F823,F831,F841,W292,W293,W504,W601,W602,W603,W604,W605,W606 .
5 E266 too many leading '#' for block comment
4 E401 multiple imports on one line
6 E402 module level import not at top of file
5 E701 multiple statements on one line (colon)
1 F812 list comprehension redefines 'n' from line 159
4 F821 undefined name 'ConnectionRefusedError'
28 W293 blank line contains whitespace
```
After this commit:
```
$ flake8 -qq --statistics --ignore=B,C,E,F,I,N,W --select=E112,E113,E115,E116,E125,E131,E133,E223,E224,E242,E266,E271,E272,E273,E274,E275,E304,E306,E401,E402,E502,E701,E702,E703,E714,E721,E741,E742,E743,F401,E901,E902,F402,F404,F406,F407,F601,F602,F621,F622,F631,F701,F702,F703,F704,F705,F706,F707,F811,F812,F821,F822,F823,F831,F841,W292,W293,W504,W601,W602,W603,W604,W605,W606 .
$
```
Tree-SHA512: fc7d5e752298a50d4248afc620ee2c173135b4ca008e48e02913ac968e5a24a5fd5396926047ec62f1d580d537434ccae01f249bb2f3338fa59dc630bf97ca7a
Signed-off-by: pasta <pasta@dashboost.org>
2018-04-16 17:49:49 +02:00
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
import dns.resolver
|
|
|
|
import collections
|
|
|
|
import json
|
|
|
|
import multiprocessing
|
|
|
|
|
2014-12-23 17:43:32 +01:00
|
|
|
NSEEDS=512
|
|
|
|
|
2018-02-05 16:39:26 +01:00
|
|
|
MAX_SEEDS_PER_ASN=4
|
2014-12-23 17:43:32 +01:00
|
|
|
|
|
|
|
# These are hosts that have been observed to be behaving strangely (e.g.
|
|
|
|
# aggressively connecting to every node).
|
2020-01-20 20:24:27 +01:00
|
|
|
with open("suspicious_hosts.txt", mode="r", encoding="utf-8") as f:
|
|
|
|
SUSPICIOUS_HOSTS = {s.strip() for s in f if s.strip()}
|
|
|
|
|
2013-01-30 04:17:56 +01:00
|
|
|
|
2015-06-25 07:53:15 +02:00
|
|
|
PATTERN_IPV4 = re.compile(r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$")
|
|
|
|
PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:(\d+)$")
|
2024-02-15 05:31:24 +01:00
|
|
|
PATTERN_ONION = re.compile(r"^([a-z2-7]{56}\.onion):(\d+)$")
|
2014-12-23 17:43:32 +01:00
|
|
|
|
2024-02-15 05:31:24 +01:00
|
|
|
def parseip(ip_in):
|
|
|
|
m = PATTERN_IPV4.match(ip_in)
|
2015-06-23 21:31:47 +02:00
|
|
|
ip = None
|
2014-12-23 17:43:32 +01:00
|
|
|
if m is None:
|
2024-02-15 05:31:24 +01:00
|
|
|
m = PATTERN_IPV6.match(ip_in)
|
2015-06-23 21:31:47 +02:00
|
|
|
if m is None:
|
2024-02-15 05:31:24 +01:00
|
|
|
m = PATTERN_ONION.match(ip_in)
|
2015-06-23 21:31:47 +02:00
|
|
|
if m is None:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
net = 'onion'
|
2015-06-25 07:53:15 +02:00
|
|
|
ipstr = sortkey = m.group(1)
|
|
|
|
port = int(m.group(2))
|
2015-06-23 21:31:47 +02:00
|
|
|
else:
|
|
|
|
net = 'ipv6'
|
|
|
|
if m.group(1) in ['::']: # Not interested in localhost
|
|
|
|
return None
|
2015-06-25 07:53:15 +02:00
|
|
|
ipstr = m.group(1)
|
|
|
|
sortkey = ipstr # XXX parse IPv6 into number, could use name_to_ipv6 from generate-seeds
|
|
|
|
port = int(m.group(2))
|
2015-06-23 21:31:47 +02:00
|
|
|
else:
|
|
|
|
# Do IPv4 sanity check
|
|
|
|
ip = 0
|
|
|
|
for i in range(0,4):
|
|
|
|
if int(m.group(i+2)) < 0 or int(m.group(i+2)) > 255:
|
|
|
|
return None
|
|
|
|
ip = ip + (int(m.group(i+2)) << (8*(3-i)))
|
|
|
|
if ip == 0:
|
2014-12-23 17:43:32 +01:00
|
|
|
return None
|
2015-06-23 21:31:47 +02:00
|
|
|
net = 'ipv4'
|
|
|
|
sortkey = ip
|
2015-06-25 07:53:15 +02:00
|
|
|
ipstr = m.group(1)
|
|
|
|
port = int(m.group(6))
|
2018-02-05 16:39:26 +01:00
|
|
|
|
2014-12-23 17:43:32 +01:00
|
|
|
return {
|
2018-02-05 16:39:26 +01:00
|
|
|
"net": net,
|
|
|
|
"ip": ipstr,
|
|
|
|
"port": port,
|
|
|
|
"ipnum": ip,
|
|
|
|
"sortkey": sortkey
|
2014-12-23 17:43:32 +01:00
|
|
|
}
|
|
|
|
|
2019-12-10 16:18:44 +01:00
|
|
|
def filtermulticollateralhash(mns):
|
|
|
|
'''Filter out MNs sharing the same collateral hash'''
|
2015-06-25 07:53:15 +02:00
|
|
|
hist = collections.defaultdict(list)
|
2019-12-10 16:18:44 +01:00
|
|
|
for mn in mns:
|
|
|
|
hist[mn['collateralHash']].append(mn)
|
|
|
|
return [mn for mn in mns if len(hist[mn['collateralHash']]) == 1]
|
|
|
|
|
|
|
|
def filtermulticollateraladdress(mns):
|
|
|
|
'''Filter out MNs sharing the same collateral address'''
|
|
|
|
hist = collections.defaultdict(list)
|
|
|
|
for mn in mns:
|
|
|
|
hist[mn['collateralAddress']].append(mn)
|
|
|
|
return [mn for mn in mns if len(hist[mn['collateralAddress']]) == 1]
|
|
|
|
|
|
|
|
def filtermultipayoutaddress(mns):
|
|
|
|
'''Filter out MNs sharing the same payout address'''
|
|
|
|
hist = collections.defaultdict(list)
|
|
|
|
for mn in mns:
|
|
|
|
hist[mn['state']['payoutAddress']].append(mn)
|
|
|
|
return [mn for mn in mns if len(hist[mn['state']['payoutAddress']]) == 1]
|
2015-06-25 07:53:15 +02:00
|
|
|
|
2018-11-10 17:00:24 +01:00
|
|
|
def resolveasn(resolver, ip):
|
2019-05-20 17:53:08 +02:00
|
|
|
if ip['net'] == 'ipv4':
|
|
|
|
ipaddr = ip['ip']
|
|
|
|
prefix = '.origin'
|
|
|
|
else: # http://www.team-cymru.com/IP-ASN-mapping.html
|
|
|
|
res = str() # 2001:4860:b002:23::68
|
|
|
|
for nb in ip['ip'].split(':')[:4]: # pick the first 4 nibbles
|
|
|
|
for c in nb.zfill(4): # right padded with '0'
|
|
|
|
res += c + '.' # 2001 4860 b002 0023
|
|
|
|
ipaddr = res.rstrip('.') # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3
|
|
|
|
prefix = '.origin6'
|
|
|
|
asn = int([x.to_text() for x in resolver.resolve('.'.join(reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com', 'TXT').response.answer][0].split('\"')[1].split(' ')[0])
|
2018-11-10 17:00:24 +01:00
|
|
|
return asn
|
|
|
|
|
2014-12-23 17:43:32 +01:00
|
|
|
# Based on Greg Maxwell's seed_filter.py
|
|
|
|
def filterbyasn(ips, max_per_asn, max_total):
|
2015-06-23 21:31:47 +02:00
|
|
|
# Sift out ips by type
|
2019-05-20 17:53:08 +02:00
|
|
|
ips_ipv46 = [ip for ip in ips if ip['net'] in ['ipv4', 'ipv6']]
|
2015-06-23 21:31:47 +02:00
|
|
|
ips_onion = [ip for ip in ips if ip['net'] == 'onion']
|
|
|
|
|
2018-02-05 16:39:26 +01:00
|
|
|
my_resolver = dns.resolver.Resolver()
|
|
|
|
|
2018-11-10 17:00:24 +01:00
|
|
|
pool = multiprocessing.Pool(processes=16)
|
|
|
|
|
2018-02-05 16:39:26 +01:00
|
|
|
# OpenDNS servers
|
|
|
|
my_resolver.nameservers = ['208.67.222.222', '208.67.220.220']
|
|
|
|
|
2018-11-10 17:00:24 +01:00
|
|
|
# Resolve ASNs in parallel
|
2019-05-20 17:53:08 +02:00
|
|
|
asns = [pool.apply_async(resolveasn, args=(my_resolver, ip)) for ip in ips_ipv46]
|
2018-11-10 17:00:24 +01:00
|
|
|
|
2019-05-20 17:53:08 +02:00
|
|
|
# Filter IPv46 by ASN
|
2014-12-23 17:43:32 +01:00
|
|
|
result = []
|
|
|
|
asn_count = {}
|
2019-05-20 17:53:08 +02:00
|
|
|
for i, ip in enumerate(ips_ipv46):
|
2014-12-23 17:43:32 +01:00
|
|
|
if len(result) == max_total:
|
|
|
|
break
|
|
|
|
try:
|
2018-11-10 17:00:24 +01:00
|
|
|
asn = asns[i].get()
|
2014-12-23 17:43:32 +01:00
|
|
|
if asn not in asn_count:
|
|
|
|
asn_count[asn] = 0
|
|
|
|
if asn_count[asn] == max_per_asn:
|
|
|
|
continue
|
|
|
|
asn_count[asn] += 1
|
|
|
|
result.append(ip)
|
2024-02-15 05:31:24 +01:00
|
|
|
except Exception as e:
|
|
|
|
sys.stderr.write(f'ERR: Could not resolve ASN for {ip["ip"]}: {e}\n')
|
2015-06-23 21:31:47 +02:00
|
|
|
|
2019-05-20 17:53:08 +02:00
|
|
|
# Add back Onions
|
2015-06-23 21:31:47 +02:00
|
|
|
result.extend(ips_onion)
|
2014-12-23 17:43:32 +01:00
|
|
|
return result
|
2013-01-30 04:17:56 +01:00
|
|
|
|
|
|
|
def main():
|
2019-12-10 16:18:44 +01:00
|
|
|
# This expects a json as outputted by "protx list valid 1"
|
2018-11-10 17:00:24 +01:00
|
|
|
if len(sys.argv) > 1:
|
2018-06-16 15:21:01 +02:00
|
|
|
with open(sys.argv[1], 'r', encoding="utf8") as f:
|
2019-12-10 16:18:44 +01:00
|
|
|
mns = json.load(f)
|
2018-11-10 17:00:24 +01:00
|
|
|
else:
|
2019-12-10 16:18:44 +01:00
|
|
|
mns = json.load(sys.stdin)
|
|
|
|
|
2024-02-15 05:31:24 +01:00
|
|
|
if len(sys.argv) > 2:
|
|
|
|
with open(sys.argv[2], 'r', encoding="utf8") as f:
|
|
|
|
onions = f.read().split('\n')
|
|
|
|
|
2019-12-10 16:18:44 +01:00
|
|
|
# Skip PoSe banned MNs
|
|
|
|
mns = [mn for mn in mns if mn['state']['PoSeBanHeight'] == -1]
|
|
|
|
# Skip MNs with < 10000 confirmations
|
|
|
|
mns = [mn for mn in mns if mn['confirmations'] >= 10000]
|
|
|
|
# Filter out MNs which are definitely from the same person/operator
|
|
|
|
mns = filtermulticollateralhash(mns)
|
|
|
|
mns = filtermulticollateraladdress(mns)
|
|
|
|
mns = filtermultipayoutaddress(mns)
|
|
|
|
# Extract IPs
|
|
|
|
ips = [parseip(mn['state']['service']) for mn in mns]
|
2024-02-15 05:31:24 +01:00
|
|
|
for onion in onions:
|
|
|
|
parsed = parseip(onion)
|
|
|
|
if parsed is not None:
|
|
|
|
ips.append(parsed)
|
2014-12-23 17:43:32 +01:00
|
|
|
# Look up ASNs and limit results, both per ASN and globally.
|
|
|
|
ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS)
|
|
|
|
# Sort the results by IP address (for deterministic output).
|
2024-02-15 05:31:24 +01:00
|
|
|
ips.sort(key=lambda x: (x['net'], x['sortkey']), reverse=True)
|
2013-01-30 04:17:56 +01:00
|
|
|
|
2014-12-23 17:43:32 +01:00
|
|
|
for ip in ips:
|
2015-06-25 07:53:15 +02:00
|
|
|
if ip['net'] == 'ipv6':
|
2017-02-11 08:46:00 +01:00
|
|
|
print('[%s]:%i' % (ip['ip'], ip['port']))
|
2015-06-25 07:53:15 +02:00
|
|
|
else:
|
2017-02-11 08:46:00 +01:00
|
|
|
print('%s:%i' % (ip['ip'], ip['port']))
|
2013-01-30 04:17:56 +01:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|