But now it is taking me 1.5 hours to get all inputs and outputs from each block through bitcoind's JSON-RPC. At the current pace, it will take months to get all of the data I need. I desperately need your help.
I come from academia (Economics, specifically), and I often find it difficult to convincingly describe to my peers the growth of the Bitcoin ecosystem in numbers that translate to economic value. If Bitcoin is a peer-to-peer electronic value system, we need a better indicator of BTC volume between network peers. As of today, Blockchain.info is the only company that provides that estimation, but after testing multiple random samples I have found evidence that that their methodology
significantly underestimates volume.For the past couple of months I have been working on a better methodology for estimating on-chain volume, but I am very frustrated with how long it is taking to get transaction inputs and outputs via bitcoind's JSON-RPC interface.
After months of testing random samples and manually looking at addresses, I believe I have developed a more accurate methodology to estimate on-chain volume.
I plan to share this methodology with the community once I can test my hypothesis using recent blocks, but the challenge now is to obtain all inputs and outputs in a timely manner.Below is the list of calls that I have been using to get that data. I would be extremely grateful if one of you could tell me if I am doing something wrong or if there is a better way of getting inputs and outputs from every transaction. Anything helps, it is pretty frustrating having to deal with this after months working on the hypothesis.
Ultimately, I believe a more realistic volume estimation backed by a peer reviewed methodology would increase interest from academia and add credibility to the Bitcoin network. Please help.
from bitcoinrpc.authproxy import AuthServiceProxy, JSONRPCException
import json
import requests
import pprint
import time
import os
objectivestart_time=time.time()
pp= pprint.PrettyPrinter(indent=4)
rpc_user='n'
rpc_password='n'
url=("http://%s:%s@1"%(rpc_user, rpc_password))
def req(payload):
return requests.post(url,data=json.dumps(payload)).json()
def BlockHash_JsonRPCpayload(number):
dic={
"jsonrpc":"2.0",
"method": "getblockhash",
"params": [number] ,
"id": 1
}
return (dic)
def Block_JsonRPCpayload(h):
dic={
"jsonrpc":"2.0",
"method": "getblock",
"params": [str(h)] ,
"id": 1
}
return (dic)
def rawtx_JsonRPCpayload(tx):
dic={
"jsonrpc":"2.0",
"method": "getrawtransaction",
"params": [tx, True] ,
"id": 1
}
return (dic)
def rawvin_JsonRPCpayload(tx,vin):
block_payload={
"jsonrpc":"2.0",
"method": "getrawtransaction",
"params": [tx,1] ,
"id": 1
}
request=req(block_payload)
value=request['result']['vout'][int(vin)]['value']
return float(value)
BTC_dic={}
hash_payload=[]
start_time=time.time()
block_start=481101
block_end=490000
start=block_start
end= block_start
while end < block_end:
block_tries=0
while True:
try:
end=start+100
hash_payload=[]
print ( 'start', 'end', start, end )
for i in range(start,end):
hash_payload.append(BlockHash_JsonRPCpayload(i))
start_time=time.time()
print ( 'mark 1 ')
hash_response=requests.post(url, data=json.dumps(hash_payload)).json()
print('time to get hash batch',start, "_", end, time.time()-start_time)
print ( 'success in getting hashes for ranges', start, "_ ", end )
time.sleep(0.4)
block_payload=[]
for i in hash_response:
h=i['result']
block_payload.append(Block_JsonRPCpayload(h))
start_time=time.time()
block_response=requests.post(url,data=json.dumps(block_payload)).json()
print ( 'time to get block batch ',start, "_", end, time.time()-start_time)
print ( 'success in getting blocks for ranges', start, "_ ", end )
time.sleep(0.4)
for i in range(len(block_response)):
block=block_response[i]['result']
height=block['height']
print('getting', len(block['tx']),'transactions for block ', height)
BTC_dic[height]=block
tx_payload=[]
for tx in block['tx']:
tx_payload.append(rawtx_JsonRPCpayload(tx))
try_errors=0
while True:
try:
#print ( len(tx_payload), 'number of transactions in block ', height)
start_time=time.time()
transaction=requests.post(url,data=json.dumps(tx_payload)).json()
time.sleep(0.5)
#print ('time to get tx', time.time()-start_time)
print ( 'success in getting transactions for block ', height )
except:
try_errors+=1
print ( '!! Try error !!')
print ( try_errors, ' transaction tries for block height', height)
time.sleep(3)
continue
break
for i in range(len(transaction)):
tx_result=transaction[i]['result']
vin=tx_result['vin']
vout=tx_result['vout']
try:
fees=0
for j in range(len(vout)):
fees -= tx_result['vout'][j]['value']
for j in range(len(vin)):
vout=vin[j]['vout']
txid=vin[j]['txid']
vin_error=0
while True:
try:
value=rawvin_JsonRPCpayload(txid,vout)
except:
print ( '!! vin error !!')
print ( 'attempt', vin_error )
vin_error+=1
time.sleep(0.4)
continue
break
fees+=value
tx_result['vin'][j]['value']=value
tx_result['fees']=fees
except:
tx_result['vin'][0]['value']='No Inputs (Newly Generated Coins)'
tx_result['fees']=0
BTC_dic[height]['tx'][i]=tx_result
if len(BTC_dic) >= 100:
print ( 'len BTC_dic =', len(BTC_dic))
for block in BTC_dic:
block_fee=0
transactions= BTC_dic[block]['tx']
#pp.pprint(transactions)
for j in transactions:
fee=j['fees']
block_fee+=fee
BTC_dic[block]['block fees']=block_fee
block_array=[]
for block in BTC_dic:
block_array.append(block)
file_name='jcampbell_btc_txdata'+str(min(block_array))+'_'+str(max(block_array))+'.json'
print ('saving', file_name, 'time', time.time()-objectivestart_time)
with open(file_name, 'w') as outfile:
json.dump(BTC_dic,outfile)
BTC_dic={}
start=end
except:
block_tries+=1
print(block_tries, 'tries to get data for block range ', start,"_",end)
time.sleep(5)
continue
break
for block in BTC_dic:
block_fee=0
transactions= BTC_dic[block]['tx']
#pp.pprint(transactions)
for j in transactions:
fee=j['fees']
block_fee+=fee
BTC_dic[block]['block fees']=block_fee
block_array=[]
for block in BTC_dic:
block_array.append(block)
file_name='jcampbell_btc_txdata'+str(min(block_array))+'_'+str(max(block_array))+'.json'
print ('saving', file_name, 'time', time.time()-objectivestart_time)
with open(file_name, 'w') as outfile:
json.dump(BTC_dic,outfile)