Author

Topic: How fast/simple/short a CPU bitcoin mining core script can be? (Read 2590 times)

newbie
Activity: 9
Merit: 0
Hey! I like it. I compared your code to mine.. Maybe you can help me out with my sha256 function. It keeps giving me the wrong hash. Do you notice anything obvi wrong w/it ? (it's as3)

Code:
import flash.utils.ByteArray;

var K =
[0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2]

function ch(x,y,z)
{
return (x & y) | (~x & z);
}

function maj(x,y,z)
{
return (x & y) | (x & z) | (y & z);
}

function sigma0(x)
{
return rotr(2, x) | rotr(13, x) | rotr(22, x)
}

function sigma1(x)
{
return rotr(6, x) | rotr(11, x) | rotr(25,x)
}

function lsigma0(x)
{
return rotr(7, x) | rotr(18, x) | shr(3, x);
}

function lsigma1(x)
{
return rotr(17, x) | rotr(19, x) | shr(10, x);
}

// rotate right
function rotr(n, x)
{
return (x >> n) | (x << (32-n));
}

// shift right
function shr(n, x)
{
return x >> n;
}

function pad(str)
{
var msg = new ByteArray();
// msg.length = 512/8;
msg.position = 0;

// write the characters
for( var i=0; i < str.length; i++ ) {
msg.writeByte( str.charCodeAt(i) );
}

// pad it with k zeroes
// (lmod512) +1+ k ≡ 448
var str_length_bits = str.length * 8;
var num_zeroes = 448 - 1 - (str_length_bits % 512);

// write a 1
msg.writeByte( 0x80 );

// we can only write bytes at a time, so let's see how many bytes it is
var num_zeroes_bytes = Math.floor(num_zeroes / 8);

// need to wrap to another block?
if( num_zeroes_bytes < 0 ) {
num_zeroes_bytes += 64
}

trace('num_zeroes_bytes', num_zeroes_bytes);
for( var i=0; i < num_zeroes_bytes; i++ ) {
msg.writeByte(0);
}

// 64 bit block that is the length of the message (in # of bits).
// we ignore the first 32 bits because I'm not sure how to bitwise
// shift a flash number to two 32-bit slices
msg.writeUnsignedInt(0);

// meaning our message can not be longer than (2**33)-1 in length
msg.writeUnsignedInt(str_length_bits);

return msg;
}

// do eeet
var str = "jakes";

var msg = pad(str);

for( var n = 0; n < msg.length; n++ )
{
trace( n + ' : ' +  left_pad(msg[n].toString(2)) );
}

function left_pad(str, length=8, padstr='0')
{
while( str.length < length ) {
str = padstr + str;
}

return str;
}

// parse into n 512-bit (64 bytes, 16 words) blocks

// set initial hash value
var H = [0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19];

var modVal = Math.pow(2, 32);

var block:ByteArray = new ByteArray();

var offset = 0;
msg.position = 0;
while(true)
{
try
{
// read the block
msg.readBytes(block, offset, 64);
trace('reading block from ' + offset);
offset += 64;

// construct the message schedule (64 32-bit words)
var W = [];

block.position = 0;
for( var t = 0; t < 64; t++ )
{
if( t <= 15 )
{
W[t] = block.readByte();
}
else
{
W[t] = lsigma1(W[t-2]) + W[t-7] + lsigma0(W[t-15]) + W[t-16];
}
}

// 8 working variables
var a:uint = H[0];
var b:uint = H[1];
var c:uint = H[2];
var d:uint = H[3];
var e:uint = H[4];
var f:uint = H[5];
var g:uint = H[6];
var h:uint = H[7];

for( t = 0; t < 64; t++ )
{
var T1:uint = h + sigma1(e) + ch(e,f,g) + K[t] + W[t];
//T1 = T1%modVal;

var T2:uint = sigma0(a) + maj(a,b,c);
//T2 = T2%modVal;

h = g;
g = f;
f = e;
e = d + T1;
d = c;
c = b;
b = a;
a = (T1 + T2);
//a = a%modVal;
}

// compute the i-th intermediate hash value of H(i)
H[0] = (a + H[0]) % modVal;
H[1] = (b + H[1]) % modVal;
H[2] = (c + H[2]) % modVal;
H[3] = (d + H[3]) % modVal;
H[4] = (e + H[4]) % modVal;
H[5] = (f + H[5]) % modVal;
H[6] = (g + H[6]) % modVal;
H[7] = (h + H[7]) % modVal;
}
catch(e)
{
trace('no more blocks, ending at byte ',offset, 'of', msg.length);
// no more bytes to read?
break;
}
}

// sha256('jakes') =
// 1e03a95d8dc846c1c64271998a9865ed1a46d962634df2c9cff5b095dc8b5aca

var hex = '';
for( n = 0; n < 8; n++ )
{
hex += left_pad(H[n].toString(16), 8) + ' ';
}

trace( hex.length + ': ' + hex);
newbie
Activity: 26
Merit: 0
Anybody has anything to point out?
Of course its nothing serious, just for fun and constructive learning.
newbie
Activity: 26
Merit: 0
Randomly building a CPU miner in pure C.
I just wanted to make it as compact as humanly possible, and maybe fast... maybe...

It uses a bunch of optimizations like midstate precalc and early 2nd round loop termination.

Basically it is a heavily simplified version of this reference algorithm:
http://bradconte.com/sha256_c

Compiling it with
Code:
-g -O3 -march=native
in Windows and a Core 2 Duo 2.33 GHz brings 850 Kh/s (single thread).
I wanted to make use of some sort of SSE/SIMD though, but myself I don't think I'd do it right (any help is appreciated).

What do you peolpe think?  Tongue

Windows executable:
https://mediafire.com/?xb9en1ab3t8e3c2

Source code:

sha256_cpu.c
Code:
#include "sha256_cpu.h"

int main() {

    // Big-endian 255123 block string
    uchar text[]="02000000"
    "61b9273640571357bdc428788b36ae9827349e9d40627d2d2d00000000000000"
    "b1eb3bce1dde137625382e9445e707e6ec3f9b46948d2a7d8d88da42a877104d"
    "5f1c2152"
    "57524119"
    "79f90238"
    "800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000280"
    ;

    // Little-endian version:

    // 000000023627b961571357407828c4bd98ae368b9d9e34272d7d62400000002d00000000ce3bebb1
    // 7613de1d942e3825e607e745469b3fec7d2a8d9442da888d4d1077a852211c5f194152573802f979
    // 00000080000000000000000000000000000000000000000000000000000000000000000000000000
    // 0000000080020000

    /** STRING PRE-PROCESS **/

    uint i;
    uchar *pos = text;
    static uint text1[32];

    // convert chars to hex values. Evil dump into text1
    for(i = 0; i < 128; i++) {
        *((uchar*)text1+i) = htochar(pos);
        pos+=2;
    }

    // Switch endianness
    // You can remove the following loop if you are already
    // working with a little-endian string
    for(i = 0; i < 32; i++) {
        text1[i] = byte_swap4(text1[i]);
    }

    // Pre-process finished.
    // String is loaded into text1 as 32 binary u-integers.

    /** MIDSTATE CALCULATION **/

    uint midstate[8];
    sha256_MS(text1, midstate);

    static uint res;
    uint *ptroff = &text1[16];

    text1[19] = 0x79f90238U;
    printf("Starting nonce: %08x\n", text1[19]);

    SYSTEMTIME st; // Windows API to build a timer...
    GetSystemTime(&st);
    uint start = st.wSecond*1000 + st.wMilliseconds;

    /** MAIN LOOP **/

    const uint endnonce = 0x79f90238U + 1000000U; // 850 Kh/s: hash rate on my Core 2 Duo 2.333 GHz :)

    for (text1[19] = 0x79f90238U; text1[19] < endnonce; text1[19]++) { // Increment nonce

        res = sha256d(midstate, ptroff); // The kraken. Release it.

        if (res == 0) printf("Share found at nonce: %08x SUCCESS\n", text1[19]);
    }

    GetSystemTime(&st);
    printf("Ending nonce: %08x\n\n", --text1[19]);
    printf("Total time taken: %f secs\n", ((st.wSecond*1000.0 + st.wMilliseconds)-start)/1000.0);
    printf("Estimated hashrate (can be very inaccurate): %f Mh/s\n", 1000.0/((st.wSecond*1000.0 + st.wMilliseconds)-start));

    // Btw real hash is:
    // e17e38f81b4af47ab2ff29fe554c8c767c03444aee9119381f00000000000000
    // 000000000000001f381991ee4a44037c768c4c55fe29ffb27af44a1bf8387ee1

    printf("You can now safely terminate the program.\n");
    getchar();
    return 0;
}


sha256_cpu.h
Code:
#include
#include

#define uchar unsigned char
#define uint unsigned int

uchar htochar(uchar *ptr) {
    uchar value = 0;
    char ch = *ptr;

    if (ch >= '0' && ch <= '9')
        value = (value << 4) + (ch - '0');
    else
        value = (value << 4) + (ch - 'a' + 10);

    ch = *(++ptr);

    if (ch >= '0' && ch <= '9')
        value = (value << 4) + (ch - '0');
    else
        value = (value << 4) + (ch - 'a' + 10);

    return value;
}

#define byte_swap4(val)              \
         (((val & 0xff) << 24) |     \
          ((val & 0xff00) << 8) |    \
          ((val & 0xff0000) >> 8) |  \
          ((val & 0xff000000) >> 24))

#define ROTLEFT(a,b) ((a << b) | (a >> (32-b)))
#define ROTRIGHT(a,b) ((a >> b) | (a << (32-b)))

#define CH(x,y,z) ((x & y) ^ (~x & z))
#define MAJ(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22))
#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25))
#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ (x >> 3))
#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ (x >> 10))

static const uint k[64] = {
   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
};


void sha256_MS(uint data[], uint midstate[]) {
    uint a,b,c,d,e,f,g,h,i=0,t1,t2,m[64];

    a = 0x6a09e667U;
    b = 0xbb67ae85U;
    c = 0x3c6ef372U;
    d = 0xa54ff53aU;
    e = 0x510e527fU;
    f = 0x9b05688cU;
    g = 0x1f83d9abU;
    h = 0x5be0cd19U;

    for (; i < 16; i++) m[i] = data[i];

    for (; i < 64; i++)
        m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];

    for (i = 0; i < 64; ++i) {
        t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
        t2 = EP0(a) + MAJ(a,b,c);
        h = g;
        g = f;
        f = e;
        e = d + t1;
        d = c;
        c = b;
        b = a;
        a = t1 + t2;
    }

    midstate[0] = 0x6a09e667U + a;
    midstate[1] = 0xbb67ae85U + b;
    midstate[2] = 0x3c6ef372U + c;
    midstate[3] = 0xa54ff53aU + d;
    midstate[4] = 0x510e527fU + e;
    midstate[5] = 0x9b05688cU + f;
    midstate[6] = 0x1f83d9abU + g;
    midstate[7] = 0x5be0cd19U + h;
}

uint sha256d(uint midstate[], uint text1[]) {
    uint a,b,c,d,e,f,g,h,i,t1,t2,m[64];
    uint ee,eee,eeee;

    // Hash One

    a = midstate[0];
    b = midstate[1];
    c = midstate[2];
    d = midstate[3];
    e = midstate[4];
    f = midstate[5];
    g = midstate[6];
    h = midstate[7];

    for (i = 0;  i < 16; i++) m[i] = text1[i];

    for (; i < 64; i++)
        m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];

    for (i = 0; i < 64; i++) {
        t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
        t2 = EP0(a) + MAJ(a,b,c);
        h = g;
        g = f;
        f = e;
        e = d + t1;
        d = c;
        c = b;
        b = a;
        a = t1 + t2;

    }

    m[0] = midstate[0] + a;
    m[1] = midstate[1] + b;
    m[2] = midstate[2] + c;
    m[3] = midstate[3] + d;
    m[4] = midstate[4] + e;
    m[5] = midstate[5] + f;
    m[6] = midstate[6] + g;
    m[7] = midstate[7] + h;

    // Hash Two

    a = 0x6a09e667U;
    b = 0xbb67ae85U;
    c = 0x3c6ef372U;
    d = 0xa54ff53aU;
    e = 0x510e527fU;
    f = 0x9b05688cU;
    g = 0x1f83d9abU;
    h = 0x5be0cd19U;

    m[8]  = 0x80000000U;
    m[9]  = 0x00U;
    m[10] = 0x00U;
    m[11] = 0x00U;
    m[12] = 0x00U;
    m[13] = 0x00U;
    m[14] = 0x00U;
    m[15] = 0x100U;

    for (i = 16; i < 64; i++)
        m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];

    for (i = 0; i < 57; i++) {
        t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
        t2 = EP0(a) + MAJ(a,b,c);
        h = g;
        g = f;
        f = e;
        e = d + t1;
        d = c;
        c = b;
        b = a;
        a = t1 + t2;
    }

    eeee = d + h + EP1(e) + CH(e,f,g) + 0x78a5636fU + m[57];
    eee = c + g + EP1(eeee) + CH(eeee,e,f) + 0x84c87814U + m[58];
    ee = b + f + EP1(eee) + CH(eee,eeee,e) + 0x8cc70208U + m[59];
    h = a + e + EP1(ee) + CH(ee,eee,eeee) + 0x90befffaU + m[60];

    return 0x5be0cd19U + h;
}


#include
is only used for setting up the timer. If you work on linux or mac, just get rid of the lines related to the timer.
Jump to: