Author

Topic: Are transaction IDs on blockchain.info written backwards? (Read 3594 times)

legendary
Activity: 2128
Merit: 1074
This thread just got referenced in another piece of misinformation regarding the internals of Bitcoin. I used to think that the big-endian vs. little-endian is something that confuses only undergraduates. But apparently many more people continue to get confused by the strange byte ordering in the Bitcoin code: it is neither big-endian nor little-endian. It was most likely defined accidentally to use the internal representation of the OpenSSL library that used hand-written assembly for speed on the 32-bit Intel architecture.

I used to recommend MacOSX as bi-endian platform that is easiest to work with. But Snow Leopard is the version that officially supports bi-endianness and the hardware to run it is getting hard to come by.

I'm going to post a short demonstration program that is probably a quickest way to convince the wondering programmer that Bitcoin protocol is neither big-endian nor little-endian.

To compile use the following command on Mac OSX 10.[56]:
Code:
gcc -arch i386 -arch ppc mojibake.c -o mojibake
Source:
Code:
#include

union mojibake {
unsigned char b[32],
b1[32][1],b2[16][2],b4[8][4],
b8[4][8],b16[2][16],b32[1][32];
unsigned short h[16];
unsigned int w[8];
unsigned long long d[4];
} x;

#if defined(__LITTLE_ENDIAN__) || defined(_MSC_VER)
#define E(N,j) (N-1-(j))
#elif defined(__BIG_ENDIAN__)
#define E(N,j) (j)
#else
#error not tested under this compiler
#endif
#define NL putchar('\n')
#define P(N) \
for (i = 0; i < 32/N; ++i) { \
printf("%*c",N,' '); \
for (j = 0; j < N; ++j) \
printf("%02x",x.b##N[i][E(N,j)]); \
} \
NL,NL

int main(int ac,char **av)
{
int i,j;

if (ac < 2)
ugh: return printf("usage: %s \n"
"where: is l or b or m\n"
"  for   l)ittle endian\n"
"   or   b)ig endian\n"
"   or   m)ojibake endian\n",av[0]);
switch (av[1][0]) {
case 'l': case 'L':
for (i = 0; i < sizeof x; ++i)
x.b[i] = i;
break;
case 'b': case 'B':
for (i = 0; i < sizeof x; ++i)
x.b[(sizeof x - 1) - i] = i;
break;
case 'm': case 'M':
for (i = 0; i < sizeof x; ++i)
x.b4[7-i/4][i%4] = i;
break;
default:
goto ugh;
}
for (i = 0; i < sizeof x; ++i)
printf("%1c%02x",' ',x.b[i]);
NL,NL;
for (i = 0; i < sizeof x/sizeof(short); ++i)
printf("%2c%04x",' ',x.h[i]);
NL,NL;
for (i = 0; i < sizeof x/sizeof(int); ++i)
printf("%4c%08x",' ',x.w[i]);
NL,NL;
for (i = 0; i < sizeof x/sizeof(long long); ++i)
printf("%8c%016llx",' ',x.d[i]);
NL,NL;
/*
P(2);
P(4);
*/
P(8);
P(16);
P(32);
return 0;
}
Output in the little-endian mode:
Code:
$ arch -i386 ./mojibake l
 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f

  0100  0302  0504  0706  0908  0b0a  0d0c  0f0e  1110  1312  1514  1716  1918  1b1a  1d1c  1f1e

    03020100    07060504    0b0a0908    0f0e0d0c    13121110    17161514    1b1a1918    1f1e1d1c

        0706050403020100        0f0e0d0c0b0a0908        1716151413121110        1f1e1d1c1b1a1918

        0706050403020100        0f0e0d0c0b0a0908        1716151413121110        1f1e1d1c1b1a1918

                0f0e0d0c0b0a09080706050403020100                1f1e1d1c1b1a19181716151413121110

                                1f1e1d1c1b1a191817161514131211100f0e0d0c0b0a09080706050403020100

$ arch -i386 ./mojibake b
 1f 1e 1d 1c 1b 1a 19 18 17 16 15 14 13 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00

  1e1f  1c1d  1a1b  1819  1617  1415  1213  1011  0e0f  0c0d  0a0b  0809  0607  0405  0203  0001

    1c1d1e1f    18191a1b    14151617    10111213    0c0d0e0f    08090a0b    04050607    00010203

        18191a1b1c1d1e1f        1011121314151617        08090a0b0c0d0e0f        0001020304050607

        18191a1b1c1d1e1f        1011121314151617        08090a0b0c0d0e0f        0001020304050607

                101112131415161718191a1b1c1d1e1f                000102030405060708090a0b0c0d0e0f

                                000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f

$ arch -i386 ./mojibake m
 1c 1d 1e 1f 18 19 1a 1b 14 15 16 17 10 11 12 13 0c 0d 0e 0f 08 09 0a 0b 04 05 06 07 00 01 02 03

  1d1c  1f1e  1918  1b1a  1514  1716  1110  1312  0d0c  0f0e  0908  0b0a  0504  0706  0100  0302

    1f1e1d1c    1b1a1918    17161514    13121110    0f0e0d0c    0b0a0908    07060504    03020100

        1b1a19181f1e1d1c        1312111017161514        0b0a09080f0e0d0c        0302010007060504

        1b1a19181f1e1d1c        1312111017161514        0b0a09080f0e0d0c        0302010007060504

                13121110171615141b1a19181f1e1d1c                03020100070605040b0a09080f0e0d0c

                                03020100070605040b0a09080f0e0d0c13121110171615141b1a19181f1e1d1c

Output in the big-endian mode:
Code:
$ arch -ppc ./mojibake l
 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f

  0001  0203  0405  0607  0809  0a0b  0c0d  0e0f  1011  1213  1415  1617  1819  1a1b  1c1d  1e1f

    00010203    04050607    08090a0b    0c0d0e0f    10111213    14151617    18191a1b    1c1d1e1f

        0001020304050607        08090a0b0c0d0e0f        1011121314151617        18191a1b1c1d1e1f

        0001020304050607        08090a0b0c0d0e0f        1011121314151617        18191a1b1c1d1e1f

                000102030405060708090a0b0c0d0e0f                101112131415161718191a1b1c1d1e1f

                                000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f

$ arch -ppc ./mojibake b
 1f 1e 1d 1c 1b 1a 19 18 17 16 15 14 13 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00

  1f1e  1d1c  1b1a  1918  1716  1514  1312  1110  0f0e  0d0c  0b0a  0908  0706  0504  0302  0100

    1f1e1d1c    1b1a1918    17161514    13121110    0f0e0d0c    0b0a0908    07060504    03020100

        1f1e1d1c1b1a1918        1716151413121110        0f0e0d0c0b0a0908        0706050403020100

        1f1e1d1c1b1a1918        1716151413121110        0f0e0d0c0b0a0908        0706050403020100

                1f1e1d1c1b1a19181716151413121110                0f0e0d0c0b0a09080706050403020100

                                1f1e1d1c1b1a191817161514131211100f0e0d0c0b0a09080706050403020100

$ arch -ppc ./mojibake m
 1c 1d 1e 1f 18 19 1a 1b 14 15 16 17 10 11 12 13 0c 0d 0e 0f 08 09 0a 0b 04 05 06 07 00 01 02 03

  1c1d  1e1f  1819  1a1b  1415  1617  1011  1213  0c0d  0e0f  0809  0a0b  0405  0607  0001  0203

    1c1d1e1f    18191a1b    14151617    10111213    0c0d0e0f    08090a0b    04050607    00010203

        1c1d1e1f18191a1b        1415161710111213        0c0d0e0f08090a0b        0405060700010203

        1c1d1e1f18191a1b        1415161710111213        0c0d0e0f08090a0b        0405060700010203

                1c1d1e1f18191a1b1415161710111213                0c0d0e0f08090a0b0405060700010203

                                1c1d1e1f18191a1b14151617101112130c0d0e0f08090a0b0405060700010203

It is probably easiest to compare the last two code inserts either side-by-side or using a visual diff utility.
jr. member
Activity: 42
Merit: 11
It will have different first letter (and lenght), so no more confusion then current way.
hero member
Activity: 812
Merit: 1022
No Maps for These Territories
no. it would just annoy people when converting back and forth.
And would make people confuse them with addresses.
legendary
Activity: 1050
Merit: 1000
You are WRONG!
Thanks for the conclusive answer. Do you feel that base58-encoded txid (and possibly txout too) are better alternatives?
no. it would just annoy people when converting back and forth.
member
Activity: 84
Merit: 10
There is no integer at the output of hash function. There are 32 bytes. And bitcoin stores/transmits it as it gets it from hash function, with the same order.
However, it prints it interpreting it as little-endian integer.
I.e. hash functions returns [1,2,3]. It's stored like this, used in internal structures etc.
Bitcoin (I suppose) prints it like this: "030201". Which I find a strange.
Ah, I think I finally figured out what you mean now. The client interprets the bytes from the hash function as an integer stored in little-endian order (not as a "little-endian integer", which has no meaning) in various places internally and in the protocol but prints out that integer in big-endian order. It should have treated the hash as an integer in big-endian order and then stored that integer in little-endian order to keep the protocol self-consistent, but that's water under the bridge now. So yes, that is strange.

I wonder if it does the same thing with block hashes too.
jr. member
Activity: 42
Merit: 11
Thanks for the conclusive answer. Do you feel that base58-encoded txid (and possibly txout too) are better alternatives?
legendary
Activity: 1050
Merit: 1000
You are WRONG!
yes, the satoshi client prints it out backwards. and its the de facto standard in all blockchain handling software.
jr. member
Activity: 42
Merit: 11
There is no integer at the output of hash function. There are 32 bytes. And bitcoin stores/transmits it as it gets it from hash function, with the same order.
However, it prints it interpreting it as little-endian integer.
I.e. hash functions returns [1,2,3]. It's stored like this, used in internal structures etc.
Bitcoin (I suppose) prints it like this: "030201". Which I find a strange.
member
Activity: 84
Merit: 10
It's storing it as bit string (like in standard). It seems when printing it, it interprets it as little-endian integer, so it comes out byte-reversed.
It's counter-intuitive, so I'm proposing use of base58 encoding for display purposes.

An integer is neither little-endian nor big-endian. An integer must be stored/sent/printed in one of the orders. So the big integer is stored in little-endian byte order in the Bitcoin protocol, but it is printed in big-endian order because that's how Westerners write numbers.

Think of the integer 123. It is not little-endian nor big-endian. I wrote it out in big-endian (the '1' digit is written first, which is on the left-hand side when writing left-to-right), but the integer itself is not big-endian. If I wrote it out in little-endian order, contrary to the Western convention, it would be 321. It's still the same number (one hundred and twenty three), but it's only written out in a different order. The same thing is happening with integers (including the transaction id) in the Bitcoin protocol. Each byte is a single "digit" in that case.
jr. member
Activity: 42
Merit: 11
It's storing it as bit string (like in standard). It seems when printing it, it interprets it as little-endian integer, so it comes out byte-reversed.
It's counter-intuitive, so I'm proposing use of base58 encoding for display purposes.
member
Activity: 84
Merit: 10
There is nothing little-endian in SHA-256. First byte is first byte, and should be printed as such (like in hex dump I provided, for example).
One approach to get rid of such inconsistency would be use of base58 encoding (which explicitly treats values as big-endian), with new version/application byte. It will be shorter, too.
If we consider the SHA-256 hash to be an integer, it can be stored in either byte order. The Bitcoin protocol stores it in Little Endian.

The integer in your example is 200e...21bd. In Little Endian byte order it is the byte sequence bd 21 ... 0e 20. This is exactly like storing/sending a smaller integer like 12345678 as 78 56 34 12 in Little Endian. The only difference is the number of bits.
jr. member
Activity: 42
Merit: 11
For human-readable form, I mean. Now they are printed as byte-reversed hex values.
full member
Activity: 154
Merit: 100
There is nothing little-endian in SHA-256. First byte is first byte, and should be printed as such (like in hex dump I provided, for example).
One approach to get rid of such inconsistency would be use of base58 encoding (which explicitly treats values as big-endian), with new version/application byte. It will be shorter, too.
It wouldn't be shorter at all. With base 58, you have 58 possible values per byte, with a byte string you get 256 values per byte. It would only look shorter when printed.
jr. member
Activity: 42
Merit: 11
There is nothing little-endian in SHA-256. First byte is first byte, and should be printed as such (like in hex dump I provided, for example).
One approach to get rid of such inconsistency would be use of base58 encoding (which explicitly treats values as big-endian), with new version/application byte. It will be shorter, too.
member
Activity: 84
Merit: 10
You're right, r.willis. The SHA spec does not explicitly point out that a SHA-256 has is a 256-bit integer.

However, as a programmer I tend to "read between the lines" and simplify specs to manage their complexity and try to understand them better. In the case of SHA, the spec mentions that all words are stored and represented in the Big-Endian order, so I came to the logical conclusion that SHA-256 outputs a 256-bit integer, with H0 being the most-significant 32-bit word and H7 the least-significant (H0 through H7 are appended from left to right).

It also simplifies understanding how the Bitcoin protocol treats the SHA-256 hash bit string--as an integer stored in Little Endian. This is consistent with the rest of the protocol as most every other integer is stored in the Little-Endian byte order (IP addresses and TCP port numbers being notable exceptions).

Dealing with the hash as an array of 32 char becomes straightforward: hash[0] is the least-significant digit (base-256 digit because a char is 8 bits wide) and hash[31] is the most-significant digit.

To print out the hash, it's a simple matter of printing out hash[31] through hash[0], as the Western convention is to write numbers in Big-Endian order.
jr. member
Activity: 42
Merit: 11
http://tools.ietf.org/html/rfc6234
The hash is supposed to be a 256bit integer.
Please provide exact citation. It talks about 8-, 32-, and 64-bit integers, but I see nothing about hash being 256bin integer.
SHA-256 (the hash function used to compute Bitcoin transaction IDs) treats the hash value as an integer.
Please provide credible citation.

member
Activity: 84
Merit: 10
Code:
A cryptographic hash function is a hash function; that is, an algorithm that takes an arbitrary block of data and returns a fixed-size [b]bit string[/b]
It's not integer, it's bit (byte) string.
Calculation of hash is defined with bit string operations, i.e. shifts and xors.
SHA-256 (the hash function used to compute Bitcoin transaction IDs) treats the hash value as an integer.

Keep in mind that an integer is also a bit string in a binary computer, so Wikipedia's definition of a cryptographic hash function is accurate but incomplete when discussing a specific hash function like SHA-256.
hero member
Activity: 675
Merit: 514
If you want to read the actual definition of the sha256 algorithm look here:
http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
or here:
http://tools.ietf.org/html/rfc6234
The hash is supposed to be a 256bit integer.
full member
Activity: 154
Merit: 100
Code:
A cryptographic hash function is a hash function; that is, an algorithm that takes an arbitrary block of data and returns a fixed-size [b]bit string[/b]
It's not integer, it's bit (byte) string.
Calculation of hash is defined with bit string operations, i.e. shifts and xors.
Internally, the reference client represents all hashes as 256 bit integers (or 160 bit integers for RIPEMD hashes). It makes no sense to me either as the only arithmetic operation that needs to be performed is to compare the hash to the difficulty target when verifying a block, and this can be done with lexicographic ordering which is the default when comparing strings.
jr. member
Activity: 42
Merit: 11
Code:
A cryptographic hash function is a hash function; that is, an algorithm that takes an arbitrary block of data and returns a fixed-size [b]bit string[/b]
It's not integer, it's bit (byte) string.
Calculation of hash is defined with bit string operations, i.e. shifts and xors.
member
Activity: 84
Merit: 10
A cryptographic hash is a big integer. What C++ integer type would you use to store a 256-bit integer besides an array of a smaller integer type (char in this case)?
jr. member
Activity: 42
Merit: 11
No, it is not.
Code:
32 	hash 	char[32] 	The hash of the referenced transaction. 
It makes no sense to treat (and print) it as integer.
But it seems like this strange custom (reversing represenation of tx ids) goes deep into the history of bitcoin. Someone (Satoshi?) implemented it that way, and everyone just follows.
member
Activity: 84
Merit: 10
A transaction id is a very large integer. Or you could say that an integer is also an array of bytes.
jr. member
Activity: 42
Merit: 11
Endianness have meaning when we talk about integers. tx ids are not integers, but array of bytes (chars).
member
Activity: 84
Merit: 10
It's in Little Endian byte order (least-significant byte first) in the protocol, but it's written out in Big Endian byte order (most-significant byte first) as most other numbers in English normally are.
full member
Activity: 141
Merit: 100
Seems I was mistaken. I was looking at the txid and didn't realize the byte order was already changed.
jr. member
Activity: 42
Merit: 11
No, I did not mean block hash. Transaction hash (sha256(sha256(TX))), as used
in inv message and to identify transaction inputs.
Okay. Take this example TX message:
Code:
00000000  f9 be b4 d9 74 78 00 00  00 00 00 00 00 00 00 00  |....tx..........|
00000010  02 01 00 00 90 43 5a 1c  01 00 00 00 01 bd 21 ae  |.....CZ.......!.|
00000020  63 83 d4 8c 04 47 14 cb  6a 2f 48 83 4d ce fb 75  |c....G..j/H.M..u|
00000030  53 90 ad 7e 76 5e 48 fc  24 bf 59 0e 20 00 00 00  |S..~v^H.$.Y. ...|
00000040  00 8b 48 30 45 02 20 69  93 20 be 23 ff 4c eb 21  |..H0E. i. .#.L.!|
00000050  79 e7 b7 d1 ca 57 d7 1b  5e c5 20 91 45 63 ec b3  |y....W..^. .Ec..|
00000060  53 00 ee 1d a0 6a b7 02  21 00 f7 e4 39 fe 2f aa  |S....j..!...9./.|
00000070  83 c7 cd 72 cd 1f b1 79  8f 5a e6 eb cb 73 2e 1d  |...r...y.Z...s..|
00000080  81 c7 f6 30 75 9e 15 62  72 6b 01 41 04 e7 d5 08  |...0u..brk.A....|
00000090  09 71 d8 bb f0 d1 8f 25  35 ea 09 5f 63 16 6e a2  |.q.....%5.._c.n.|
000000a0  0f 75 75 71 91 64 1a ad  a4 3d 0c 26 c1 50 b7 f8  |.uuq.d...=.&.P..|
000000b0  ad de 67 3c ae e1 5c 59  f6 1f 9c 4a 31 11 e0 61  |..g<..\Y...J1..a|
000000c0  da fb cf 00 52 d2 9c 35  21 f1 52 dd f0 ff ff ff  |....R..5!.R.....|
000000d0  ff 02 40 4b 4c 00 00 00  00 00 19 76 a9 14 06 f1  |[email protected]....|
000000e0  b6 70 79 1f 92 56 bf fc  89 8f 47 42 71 c2 2f 4b  |.py..V....GBq./K|
000000f0  b9 49 88 ac c2 fd 18 00  00 00 00 00 19 76 a9 14  |.I...........v..|
00000100  16 db fb 2a d4 f8 2c be  d2 0b 3a 32 44 94 00 f4  |...*..,...:2D...|
00000110  be 97 6d ef 88 ac 00 00  00 00                    |..m.......|
There is tx hash used as means to identify tx input:
bd21ae6383d48c044714cb6a2f48834dcefb755390ad7e765e48fc24bf590e20
If you search it on blockchain.info, you will find nothing. If you search it byte-reversed (200e59bf24fc485e767ead905375fbce4d83482f6acb1447048cd48363ae21bd),
you will find transaction in question. It seems unmistakeably reversed for me.   
full member
Activity: 141
Merit: 100
TX ids aren't written backwards. Did you mean the block hash? The block hash goes through an endianess change.

http://en.wikipedia.org/wiki/Endianness
jr. member
Activity: 42
Merit: 11
No one willing to check it? Shocked
jr. member
Activity: 42
Merit: 11
Are transaction IDs (32-byte hashes of TX body) on blockchain.info written backwards, i.e. with bytes reversed?
Or it's mistake on my end, and I have it backwards myself?
Jump to: