Randomly building a CPU miner in pure C.
I just wanted to make it as compact as humanly possible, and maybe fast... maybe...
It uses a bunch of optimizations like midstate precalc and early 2nd round loop termination.
in Windows and a Core 2 Duo 2.33 GHz brings 850 Kh/s (single thread).
I wanted to make use of some sort of SSE/SIMD though, but myself I don't think I'd do it right (any help is appreciated).
#include
#include
#define uchar unsigned char
#define uint unsigned int
uchar htochar(uchar *ptr) {
uchar value = 0;
char ch = *ptr;
if (ch >= '0' && ch <= '9')
value = (value << 4) + (ch - '0');
else
value = (value << 4) + (ch - 'a' + 10);
ch = *(++ptr);
if (ch >= '0' && ch <= '9')
value = (value << 4) + (ch - '0');
else
value = (value << 4) + (ch - 'a' + 10);
return value;
}
#define byte_swap4(val) \
(((val & 0xff) << 24) | \
((val & 0xff00) << 8) | \
((val & 0xff0000) >> 8) | \
((val & 0xff000000) >> 24))
#define ROTLEFT(a,b) ((a << b) | (a >> (32-b)))
#define ROTRIGHT(a,b) ((a >> b) | (a << (32-b)))
#define CH(x,y,z) ((x & y) ^ (~x & z))
#define MAJ(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22))
#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25))
#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ (x >> 3))
#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ (x >> 10))
static const uint k[64] = {
0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
};
void sha256_MS(uint data[], uint midstate[]) {
uint a,b,c,d,e,f,g,h,i=0,t1,t2,m[64];
a = 0x6a09e667U;
b = 0xbb67ae85U;
c = 0x3c6ef372U;
d = 0xa54ff53aU;
e = 0x510e527fU;
f = 0x9b05688cU;
g = 0x1f83d9abU;
h = 0x5be0cd19U;
for (; i < 16; i++) m[i] = data[i];
for (; i < 64; i++)
m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];
for (i = 0; i < 64; ++i) {
t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
t2 = EP0(a) + MAJ(a,b,c);
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
midstate[0] = 0x6a09e667U + a;
midstate[1] = 0xbb67ae85U + b;
midstate[2] = 0x3c6ef372U + c;
midstate[3] = 0xa54ff53aU + d;
midstate[4] = 0x510e527fU + e;
midstate[5] = 0x9b05688cU + f;
midstate[6] = 0x1f83d9abU + g;
midstate[7] = 0x5be0cd19U + h;
}
uint sha256d(uint midstate[], uint text1[]) {
uint a,b,c,d,e,f,g,h,i,t1,t2,m[64];
uint ee,eee,eeee;
// Hash One
a = midstate[0];
b = midstate[1];
c = midstate[2];
d = midstate[3];
e = midstate[4];
f = midstate[5];
g = midstate[6];
h = midstate[7];
for (i = 0; i < 16; i++) m[i] = text1[i];
for (; i < 64; i++)
m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];
for (i = 0; i < 64; i++) {
t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
t2 = EP0(a) + MAJ(a,b,c);
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
m[0] = midstate[0] + a;
m[1] = midstate[1] + b;
m[2] = midstate[2] + c;
m[3] = midstate[3] + d;
m[4] = midstate[4] + e;
m[5] = midstate[5] + f;
m[6] = midstate[6] + g;
m[7] = midstate[7] + h;
// Hash Two
a = 0x6a09e667U;
b = 0xbb67ae85U;
c = 0x3c6ef372U;
d = 0xa54ff53aU;
e = 0x510e527fU;
f = 0x9b05688cU;
g = 0x1f83d9abU;
h = 0x5be0cd19U;
m[8] = 0x80000000U;
m[9] = 0x00U;
m[10] = 0x00U;
m[11] = 0x00U;
m[12] = 0x00U;
m[13] = 0x00U;
m[14] = 0x00U;
m[15] = 0x100U;
for (i = 16; i < 64; i++)
m[i] = SIG1(m[i-2]) + m[i-7] + SIG0(m[i-15]) + m[i-16];
for (i = 0; i < 57; i++) {
t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
t2 = EP0(a) + MAJ(a,b,c);
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
eeee = d + h + EP1(e) + CH(e,f,g) + 0x78a5636fU + m[57];
eee = c + g + EP1(eeee) + CH(eeee,e,f) + 0x84c87814U + m[58];
ee = b + f + EP1(eee) + CH(eee,eeee,e) + 0x8cc70208U + m[59];
h = a + e + EP1(ee) + CH(ee,eee,eeee) + 0x90befffaU + m[60];
return 0x5be0cd19U + h;
}