Look at sub read_var_int in my quick n dirty parser:
#!/usr/bin/perl
use warnings;
use strict;
use List::MoreUtils qw/natatime/;
use Data::Dumper;
my $MAGIC = 3652501241;
my $BLOCKS_DIR = '/home/btc/.bitcoin/blocks';
## xxd-like output
sub print_dump {
my ($data) = @_;
my $offset = 0;
open my $fd, '<', \$data or die "open: $!";
while (1) {
my $read = read $fd, my $bytes, 16;
last unless $read;
my @bytes = split '', $bytes;
my $rawchars = $bytes; $rawchars =~ s/[^ -~]/./g;
my @hexpairs;
my $iter = natatime 2, @bytes;
while (my @vals = $iter->()) {
push @hexpairs, join '', map { sprintf '%02x', ord } @vals;
}
printf "%08x: %-39s %s\n", $offset, (join ' ', @hexpairs), $rawchars;
$offset += $read;
}
close $fd;
}
sub read_var_int {
my ($fd, $rem) = @_;
my $val;
my $read = read $fd, $val, 1; $$rem -= 1;
if (!$read) { return undef; }
$val = unpack 'C', $val;
if ($val == 0xfd) {
$read = read $fd, $val, 2; $$rem -= 2;
$val = unpack 'S', $val;
} elsif ($val == 0xfe) {
$read = read $fd, $val, 4; $$rem -= 4;
$val = unpack 'L', $val;
} elsif ($val == 0xff) {
$read = read $fd, $val, 8; $$rem -= 8;
$val = unpack 'Q', $val;
}
return $val;
}
sub read_inputs {
my ($fd, $count, $rem) = @_;
my $read; ## should be used to check ret vals from read/sysread
my $inputs;
for my $input (1 .. $count) {
my ($prev_txid, $prev_idx, $sig_script, $seq);
$read = read $fd, $prev_txid, 32; $$rem -= 32;
$prev_txid = unpack 'H64', $prev_txid;
#print " prev_txid ($prev_txid)\n";
$read = read $fd, $prev_idx, 4; $$rem -= 4;
$prev_idx = unpack 'L', $prev_idx;
#print " prev_idx ($prev_idx)\n";
my $sig_script_len = read_var_int $fd, $rem;
if (!defined $sig_script_len) { print 'sig_script_len undef'; die; }
$read = read $fd, $sig_script, $sig_script_len; $$rem -= $sig_script_len;
#printf " sig_script: %d bytes\n", length $sig_script;
$read = read $fd, $seq, 4; $$rem -= 4;
$seq = unpack 'L', $seq;
#print " seq ($seq)\n";
push @$inputs, {
prev_txid => $prev_txid,
prev_idx => $prev_idx,
sig_script => $sig_script,
seq => $seq,
}
}
return $inputs;
}
sub read_outputs {
my ($fd, $count, $rem) = @_;
my $read; ## should be used to check ret vals from read/sysread
my $outputs;
for my $output (1 .. $count) {
my ($val, $pubkey_script_len, $pubkey_script);
$read = read $fd, $val, 8; $$rem -= 8;
$val = unpack 'Q', $val;
$pubkey_script_len = read_var_int $fd, $rem;
if (!defined $pubkey_script_len) { print 'pubkey_script_len undef'; die; }
$read = read $fd, $pubkey_script, $pubkey_script_len; $$rem -= $pubkey_script_len;
#printf " pubkey_script: %d bytes\n", length $pubkey_script;
push @$outputs, {
val => $val,
pubkey_script => $pubkey_script,
};
}
return $outputs;
}
sub parse_txs {
my ($txn_data, $len) = @_;
my $read; ## should be used to check ret vals from read/sysread
my $txs;
my $remaining = $len;
open my $txn_fd, '<', \$txn_data or die "open: $!";
my $txn_count = read_var_int $txn_fd, \$remaining;
if (!defined $txn_count) { print 'txn_count undef'; die; }
#print " txn_count ($txn_count)\n";
#print_dump $txn_data;
my ($tx_ver, $input_count, $inputs, $output_count, $outputs, $lock_time);
for my $tx_idx (1 .. $txn_count) {
$read = read $txn_fd, $tx_ver, 4; $remaining -= 4;
$tx_ver = unpack 'L', $tx_ver;
#print " tx_ver ($tx_ver)\n";
$input_count = read_var_int $txn_fd, \$remaining;
if (!defined $input_count) { print 'input_count undef'; die; }
#print " input_count ($input_count)\n";
$inputs = read_inputs $txn_fd, $input_count, \$remaining;
#print Data::Dumper->Dump ([$inputs],['inputs']);
$output_count = read_var_int $txn_fd, \$remaining;
if (!defined $output_count) { print 'output_count undef'; die; }
#print " output_count ($output_count)\n";
$outputs = read_outputs $txn_fd, $output_count, \$remaining;
#print Data::Dumper->Dump ([$outputs],['outputs']);
$read = read $txn_fd, $lock_time, 4; $remaining -= 4;
$lock_time = unpack 'L', $lock_time;
#print " lock_time ($lock_time)\n";
push @$txs, {
version => $tx_ver,
inputs => $inputs,
outputs => $outputs,
lock_time => $lock_time,
};
}
return $txs;
}
sub parse_block {
my ($height, $block_data, $len) = @_;
my $read; ## should be used to check ret vals from read/sysread
my $remaining = $len;
open my $block_fd, '<', \$block_data or die "open: $!";
my $block_header;
$read = read $block_fd, $block_header, 80; $remaining -= 80;
my ($ver, $prev_block, $mrkl, $ts, $bits, $nonce) = unpack 'L H64 H64 L H8 L', $block_header;
$prev_block = reverse $prev_block=~/../g;
$mrkl = reverse $mrkl=~/../g;
my $txn_data;
$read = read $block_fd, $txn_data, $remaining;
close $block_fd;
#my $tx = parse_txs $txn_data, $remaining; ## commented out for faster parsing
return {
version => $ver,
height => $height,
prev_block => $prev_block,
merkle_tree => $mrkl,
timestamp => $ts,
bits => $bits,
nonce => $nonce,
#tx => $tx,
};
}
my $blk_file_num = -1;
my $fd;
sub open_next_blk_file {
close $fd if defined $fd;
$blk_file_num++;
my $blkfile = sprintf "$BLOCKS_DIR/blk%05d.dat", $blk_file_num;
sysopen $fd, $blkfile, 0 or die "sysopen: $!";
binmode $fd;
}
#################################################################################################
open_next_blk_file;
my $height = 0;
my %prev_blocks_seen;
while (1) {
my $read; ## should be used to check ret vals from read/sysread
my $data;
my ($magic, $len, $remaining);
$read = sysread $fd, $data, 8;
if (!defined $read) { die "sysread: $!"; }
if (!$read) {
warn "sysread: null, going to next file";
open_next_blk_file;
redo;
}
if ($read < 8) {
warn "sysread: short read, going to next file";
open_next_blk_file;
redo;
}
($magic, $len) = unpack 'L L', $data;
$remaining = $len;
next unless $magic; ## magic == 0, probably near end of file
if ($MAGIC != $magic) { die "got magic ($magic) instead of ($MAGIC) at block $height\n"; }
## read whole block
$read = sysread $fd, $data, $len;
if ($len != $read) { $read or last; die "sysread: $!"; }
#print_dump $block_data;
my $block = parse_block $height, $data, $len;
## orphan detection, untested on orphan chains larger than one single block
if ($height and exists $prev_blocks_seen{ $block->{'prev_block'} }) {
my $to_downgrade = ($height-1) - $prev_blocks_seen{ $block->{'prev_block'} };
warn "orphan, height ($block->{'height'}) pb ($block->{'prev_block'}) ts ($block->{'timestamp'}) to_downgrade ($to_downgrade)\n";
$height -= ($height-1) - $prev_blocks_seen{ $block->{'prev_block'} };
next;
}
print "$height,$len\n";
# if (128352 == $block->{'height'}) {
# use bignum;
# my $bits = join '', reverse $block->{'bits'} =~ /../g;
# my ($b1, $b2) = map { hex $_ } $bits =~ /^(..)(.*)$/;
# my $diff = (0xffff << 208) / ($b2 * 2 ** (8 * ($b1-3)));
# $block->{'difficulty'} = "$diff";
#
# print Data::Dumper->Dump ([$block],['block']);
# }
$prev_blocks_seen{ $block->{'prev_block'} } = $height;
$height++;
}
close $fd;
Thank you!