"nobody has done studies of the existing block chain to see how much space could be reclaimed." --https://en.bitcoin.it/wiki/Scalability
So I gave it a go. I hope someone will check my logic, especially as I am new to Python.
$ time src/bitcointools/freeable.py
loaded 125925 blocks
201085 of 544296 tx freeable (corrected:
387616 of 544296 tx)
63328760 of 168627001 tx bytes freeable (corrected:
123285900 of 168627001 tx bytes)
real 0m47.131s
user 0m43.799s
sys 0m2.744s
$ ls -l .bitcoin/blk0*.dat
-rw------- 1 jtobey jtobey 180119823 2011-05-23 00:42 .bitcoin/blk0001.dat
(edited: fixed bug
http://forum.bitcoin.org/index.php?topic=9461.msg137059#msg137059) diff of bitcointools:
diff --git a/deserialize.py b/deserialize.py
index fe0cb09..a67645b 100644
--- a/deserialize.py
+++ b/deserialize.py
@@ -75,6 +75,7 @@ def deserialize_TxOut(d, owner_keys=None):
def parse_Transaction(vds):
d = {}
+ start = vds.read_cursor # XXX breaks BCDataStream abstraction, do we care?
d['version'] = vds.read_int32()
n_vin = vds.read_compact_size()
d['txIn'] = []
@@ -85,6 +86,7 @@ def parse_Transaction(vds):
for i in xrange(n_vout):
d['txOut'].append(parse_TxOut(vds))
d['lockTime'] = vds.read_uint32()
+ d['tx'] = vds.input[start:vds.read_cursor]
return d
def deserialize_Transaction(d, transaction_index=None, owner_keys=None):
result = "%d tx in, %d out\n"%(len(d['txIn']), len(d['txOut']))
diff --git a/freeable.py b/freeable.py
new file mode 100755
index 0000000..c79f458
--- /dev/null
+++ b/freeable.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+#
+# Read the block database, find out how many transactions
+# could be purged and how many bytes they take up.
+# TODO: find out how many Merkle hashes could be purged.
+#
+from bsddb.db import *
+import logging
+import os
+import sys
+
+from BCDataStream import *
+from block import scan_blocks
+from deserialize import parse_Block
+from util import determine_db_dir, create_env
+import Crypto.Hash.SHA256 as SHA256
+
+import binascii # debugging
+
+def main():
+ import optparse
+ parser = optparse.OptionParser(usage="%prog [options]")
+ parser.add_option("--datadir", dest="datadir", default=None,
+ help="Look for files here (defaults to bitcoin default)")
+ (options, args) = parser.parse_args()
+
+ if options.datadir is None:
+ db_dir = determine_db_dir()
+ else:
+ db_dir = options.datadir
+
+ try:
+ db_env = create_env(db_dir)
+ except DBNoSuchFileError:
+ logging.error("Couldn't open " + db_dir)
+ sys.exit(1)
+
+ blockfile = open(os.path.join(db_dir, "blk%04d.dat"%(1,)), "rb")
+ block_datastream = BCDataStream()
+ block_datastream.map_file(blockfile, 0)
+
+ blocks = []
+ def gather_stats(block_data):
+ block_datastream.seek_file(block_data['nBlockPos'])
+ blocks.append(parse_Block(block_datastream))
+ return True
+
+ scan_blocks(db_dir, db_env, gather_stats)
+ blocks.reverse()
+ print 'loaded ', len(blocks), ' blocks'
+
+ tx = {}
+ total_tx = 0
+ freeable_tx = 0
+ total_bytes = 0
+ freeable_bytes = 0
+
+ for data in blocks:
+ coinbase = True
+ for txn in data['transactions']:
+ tx_hash = SHA256.new(SHA256.new(txn['tx']).digest()).digest()
+ #print '> ', binascii.hexlify(tx_hash)
+ tx_bytes = len(txn['tx'])
+ tx[tx_hash] = (tx_bytes, len(txn['txOut']))
+ total_tx += 1
+ total_bytes += tx_bytes
+
+ if coinbase:
+ coinbase = False
+ else:
+ for txin in txn['txIn']:
+ #print '< ', binascii.hexlify(txin['prevout_hash'])
+ (bytes, live) = tx[txin['prevout_hash']]
+ if live == 1:
+ freeable_bytes += bytes
+ freeable_tx += 1
+ del tx[txin['prevout_hash']]
+ else:
+ tx[txin['prevout_hash']] = (bytes, live - 1)
+
+ db_env.close()
+
+ print freeable_tx, 'of', total_tx, 'tx freeable'
+ print freeable_bytes, 'of', total_bytes, 'tx bytes freeable'
+
+if __name__ == '__main__':
+ main()