delete | Bitcointalksearch.org

DeathAndTaxes

donator

Activity: 1218

Merit: 1080

Gerald Davis

Quote from: ThiagoCMC on November 15, 2011, 03:39:37 AM

Guys,

This reaper miner can be used to miner for Litecoins?!
It works/compiles on Linux?!

Thanks!
Thiago

Unlikely. SC doesn't use same algorithm for hashing as LTC does (Scrypt).

ThiagoCMC

legendary

Activity: 1204

Merit: 1000

฿itcoin: Currency of Resistance!

Guys,

This reaper miner can be used to miner for Litecoins?!
It works/compiles on Linux?!

Thanks!
Thiago

bulanula

hero member

Activity: 518

Merit: 500

tacotime

legendary

Activity: 1484

Merit: 1005

bump, I'm wondering if anyone with an AMD card has tried AMD's bitwise rotation function, it's supposed to be much faster than coding for it in OCL:

Code:

//#pragma OPENCL EXTENSION cl_amd_media_ops : enable
//#define rot(x,y) amd_bitalign(x, x, (32-y))

edit: Appears someone uploaded an AMD optimized version of reaper.cl to pastebin, here it is:

Code:

typedef uint uint32_t;
typedef ulong uint64_t;
typedef uchar uint8_t;

typedef uint uint32;
typedef ulong uint64;

#define U8TO32(p) \
  (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
   ((uint32_t)((p)[2]) <<  8) | ((uint32_t)((p)[3])      ))
#define U8TO64(p) \
  (((uint64_t)U8TO32(p) << 32) | (uint64_t)U8TO32((p) + 4))
#define U32TO8(p, v) \
    (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
    (p)[2] = (uint8_t)((v) >>  8); (p)[3] = (uint8_t)((v)      ); 
#define U64TO8(p, v) \
    U32TO8((p),     (uint32_t)((v) >> 32));	\
    U32TO8((p) + 4, (uint32_t)((v)      )); 

#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#pragma OPENCL EXTENSION cl_amd_media_ops : enable

/*typedef struct  { 
  uint64_t h[8];
  uint8_t buf[128];
} state;*/

__constant uint8_t sigma[256] = 
{
     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 ,
    14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3 ,
    11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4 ,
     7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8 ,
     9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13 ,
     2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9 ,
    12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11 ,
    13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10 ,
     6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5 ,
    10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13 ,0 ,
     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 ,
    14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3 ,
    11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4 ,
     7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8 ,
     9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13 ,
     2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9 
};

__constant uint64_t cst[16] = 
{
  0x243F6A8885A308D3UL,0x13198A2E03707344UL,0xA4093822299F31D0UL,0x082EFA98EC4E6C89UL,
  0x452821E638D01377UL,0xBE5466CF34E90C6CUL,0xC0AC29B7C97C50DDUL,0x3F84D5B5B5470917UL,
  0x9216D5D98979FB1BUL,0xD1310BA698DFB5ACUL,0x2FFD72DBD01ADFB7UL,0xB8E1AFED6A267E96UL,
  0xBA7C9045F12C7F99UL,0x24A19947B3916CF7UL,0x0801F2E2858EFC16UL,0x636920D871574E69UL
};
  
__constant uint K[64] = 
{ 
    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};

//uint rotl(uint x, uint y)
//{
//	return (x<>(32-y));
//}

#define rotl(x, y) amd_bitalign(x, x, (uint)(32 - y))


//#define Ch(x, y, z) (z ^ (x & (y ^ z)))
#define Ma(x, y, z) ((y & z) | (x & (y | z)))

#define Ch(x, y, z) bitselect(z,y,x)
// Ma can also be implemented in terms of bitselect
//#define Ma(y, z, x) bitselect(z^x,y,x)


#define Tr(x,a,b,c) (rotl(x,a)^rotl(x,b)^rotl(x,c))

#define R(x) (work[x] = (rotl(work[x-2],15)^rotl(work[x-2],13)^((work[x-2])>>10)) + work[x-7] + (rotl(work[x-15],25)^rotl(work[x-15],14)^((work[x-15])>>3)) + work[x-16])
#define sharound(a,b,c,d,e,f,g,h,x,K) h+=Tr(e,7,21,26)+Ch(e,f,g)+K+x; d+=h; h+=Tr(a,10,19,30)+Ma(a,b,c);
#define sharound_s(a,b,c,d,e,f,g,h,x) h+=Tr(e,7,21,26)+Ch(e,f,g)+x; d+=h; h+=Tr(a,10,19,30)+Ma(a,b,c);

uint EndianSwap(uint n)
{
	return ((n&0xFF)<<24) | ((n&0xFF00)<<8) | ((n&0xFF0000)>>8) | ((n&0xFF000000)>>24);
}

void Sha256_round(uint* s, unsigned char* data)
{
	uint work[64];

	uint* udata = (uint*)data;
#pragma unroll
	for(uint i=0; i<16; ++i)
	{
		work[i] = EndianSwap(udata[i]);
	}

	uint A = s[0];
	uint B = s[1];
	uint C = s[2];
	uint D = s[3];
	uint E = s[4];
	uint F = s[5];
	uint G = s[6];
	uint H = s[7];
	sharound(A,B,C,D,E,F,G,H,work[0],K[0]);
	sharound(H,A,B,C,D,E,F,G,work[1],K[1]);
	sharound(G,H,A,B,C,D,E,F,work[2],K[2]);
	sharound(F,G,H,A,B,C,D,E,work[3],K[3]);
	sharound(E,F,G,H,A,B,C,D,work[4],K[4]);
	sharound(D,E,F,G,H,A,B,C,work[5],K[5]);
	sharound(C,D,E,F,G,H,A,B,work[6],K[6]);
	sharound(B,C,D,E,F,G,H,A,work[7],K[7]);
	sharound(A,B,C,D,E,F,G,H,work[8],K[8]);
	sharound(H,A,B,C,D,E,F,G,work[9],K[9]);
	sharound(G,H,A,B,C,D,E,F,work[10],K[10]);
	sharound(F,G,H,A,B,C,D,E,work[11],K[11]);
	sharound(E,F,G,H,A,B,C,D,work[12],K[12]);
	sharound(D,E,F,G,H,A,B,C,work[13],K[13]);
	sharound(C,D,E,F,G,H,A,B,work[14],K[14]);
	sharound(B,C,D,E,F,G,H,A,work[15],K[15]);
	sharound(A,B,C,D,E,F,G,H,R(16),K[16]);
	sharound(H,A,B,C,D,E,F,G,R(17),K[17]);
	sharound(G,H,A,B,C,D,E,F,R(18),K[18]);
	sharound(F,G,H,A,B,C,D,E,R(19),K[19]);
	sharound(E,F,G,H,A,B,C,D,R(20),K[20]);
	sharound(D,E,F,G,H,A,B,C,R(21),K[21]);
	sharound(C,D,E,F,G,H,A,B,R(22),K[22]);
	sharound(B,C,D,E,F,G,H,A,R(23),K[23]);
	sharound(A,B,C,D,E,F,G,H,R(24),K[24]);
	sharound(H,A,B,C,D,E,F,G,R(25),K[25]);
	sharound(G,H,A,B,C,D,E,F,R(26),K[26]);
	sharound(F,G,H,A,B,C,D,E,R(27),K[27]);
	sharound(E,F,G,H,A,B,C,D,R(28),K[28]);
	sharound(D,E,F,G,H,A,B,C,R(29),K[29]);
	sharound(C,D,E,F,G,H,A,B,R(30),K[30]);
	sharound(B,C,D,E,F,G,H,A,R(31),K[31]);
	sharound(A,B,C,D,E,F,G,H,R(32),K[32]);
	sharound(H,A,B,C,D,E,F,G,R(33),K[33]);
	sharound(G,H,A,B,C,D,E,F,R(34),K[34]);
	sharound(F,G,H,A,B,C,D,E,R(35),K[35]);
	sharound(E,F,G,H,A,B,C,D,R(36),K[36]);
	sharound(D,E,F,G,H,A,B,C,R(37),K[37]);
	sharound(C,D,E,F,G,H,A,B,R(38),K[38]);
	sharound(B,C,D,E,F,G,H,A,R(39),K[39]);
	sharound(A,B,C,D,E,F,G,H,R(40),K[40]);
	sharound(H,A,B,C,D,E,F,G,R(41),K[41]);
	sharound(G,H,A,B,C,D,E,F,R(42),K[42]);
	sharound(F,G,H,A,B,C,D,E,R(43),K[43]);
	sharound(E,F,G,H,A,B,C,D,R(44),K[44]);
	sharound(D,E,F,G,H,A,B,C,R(45),K[45]);
	sharound(C,D,E,F,G,H,A,B,R(46),K[46]);
	sharound(B,C,D,E,F,G,H,A,R(47),K[47]);
	sharound(A,B,C,D,E,F,G,H,R(48),K[48]);
	sharound(H,A,B,C,D,E,F,G,R(49),K[49]);
	sharound(G,H,A,B,C,D,E,F,R(50),K[50]);
	sharound(F,G,H,A,B,C,D,E,R(51),K[51]);
	sharound(E,F,G,H,A,B,C,D,R(52),K[52]);
	sharound(D,E,F,G,H,A,B,C,R(53),K[53]);
	sharound(C,D,E,F,G,H,A,B,R(54),K[54]);
	sharound(B,C,D,E,F,G,H,A,R(55),K[55]);
	sharound(A,B,C,D,E,F,G,H,R(56),K[56]);
	sharound(H,A,B,C,D,E,F,G,R(57),K[57]);
	sharound(G,H,A,B,C,D,E,F,R(58),K[58]);
	sharound(F,G,H,A,B,C,D,E,R(59),K[59]);
	sharound(E,F,G,H,A,B,C,D,R(60),K[60]);
	sharound(D,E,F,G,H,A,B,C,R(61),K[61]);
	sharound(C,D,E,F,G,H,A,B,R(62),K[62]);
	sharound(B,C,D,E,F,G,H,A,R(63),K[63]);

	s[0] += A;
	s[1] += B;
	s[2] +=	C;
	s[3] += D;
	s[4] += E;
	s[5] += F;
	s[6] += G;
	s[7] += H;
}

__constant uint P[64] =
{
	0xc28a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19c0174,
	0x649b69c1, 0xf9be478a, 0x0fe1edc6, 0x240ca60c, 0x4fe9346f, 0x4d1c84ab, 0x61b94f1e, 0xf6f993db,
	0xe8465162, 0xad13066f, 0xb0214c0d, 0x695a0283, 0xa0323379, 0x2bd376e9, 0xe1d0537c, 0x03a244a0,
	0xfc13a4a5, 0xfafda43e, 0x56bea8bb, 0x445ec9b6, 0x39907315, 0x8c0d4e9f, 0xc832dccc, 0xdaffb65b,
	0x1fed4f61, 0x2f646808, 0x1ff32294, 0x2634ccd7, 0xb0ebdefa, 0xd6fc592b, 0xa63c5c8f, 0xbe9fbab9,
	0x0158082c, 0x68969712, 0x51e1d7e1, 0x5cf12d0d, 0xc4be2155, 0x7d7c8a34, 0x611f2c60, 0x036324af,
	0xa4f08d87, 0x9e3e8435, 0x2c6dae30, 0x11921afc, 0xb76d720e, 0x245f3661, 0xc3a65ecb, 0x43b9e908
};

void Sha256_round_padding(uint* s)
{
	uint A = s[0];
	uint B = s[1];
	uint C = s[2];
	uint D = s[3];
	uint E = s[4];
	uint F = s[5];
	uint G = s[6];
	uint H = s[7];
	sharound_s(A,B,C,D,E,F,G,H,P[0]);
	sharound_s(H,A,B,C,D,E,F,G,P[1]);
	sharound_s(G,H,A,B,C,D,E,F,P[2]);
	sharound_s(F,G,H,A,B,C,D,E,P[3]);
	sharound_s(E,F,G,H,A,B,C,D,P[4]);
	sharound_s(D,E,F,G,H,A,B,C,P[5]);
	sharound_s(C,D,E,F,G,H,A,B,P[6]);
	sharound_s(B,C,D,E,F,G,H,A,P[7]);
	sharound_s(A,B,C,D,E,F,G,H,P[8]);
	sharound_s(H,A,B,C,D,E,F,G,P[9]);
	sharound_s(G,H,A,B,C,D,E,F,P[10]);
	sharound_s(F,G,H,A,B,C,D,E,P[11]);
	sharound_s(E,F,G,H,A,B,C,D,P[12]);
	sharound_s(D,E,F,G,H,A,B,C,P[13]);
	sharound_s(C,D,E,F,G,H,A,B,P[14]);
	sharound_s(B,C,D,E,F,G,H,A,P[15]);
	sharound_s(A,B,C,D,E,F,G,H,P[16]);
	sharound_s(H,A,B,C,D,E,F,G,P[17]);
	sharound_s(G,H,A,B,C,D,E,F,P[18]);
	sharound_s(F,G,H,A,B,C,D,E,P[19]);
	sharound_s(E,F,G,H,A,B,C,D,P[20]);
	sharound_s(D,E,F,G,H,A,B,C,P[21]);
	sharound_s(C,D,E,F,G,H,A,B,P[22]);
	sharound_s(B,C,D,E,F,G,H,A,P[23]);
	sharound_s(A,B,C,D,E,F,G,H,P[24]);
	sharound_s(H,A,B,C,D,E,F,G,P[25]);
	sharound_s(G,H,A,B,C,D,E,F,P[26]);
	sharound_s(F,G,H,A,B,C,D,E,P[27]);
	sharound_s(E,F,G,H,A,B,C,D,P[28]);
	sharound_s(D,E,F,G,H,A,B,C,P[29]);
	sharound_s(C,D,E,F,G,H,A,B,P[30]);
	sharound_s(B,C,D,E,F,G,H,A,P[31]);
	sharound_s(A,B,C,D,E,F,G,H,P[32]);
	sharound_s(H,A,B,C,D,E,F,G,P[33]);
	sharound_s(G,H,A,B,C,D,E,F,P[34]);
	sharound_s(F,G,H,A,B,C,D,E,P[35]);
	sharound_s(E,F,G,H,A,B,C,D,P[36]);
	sharound_s(D,E,F,G,H,A,B,C,P[37]);
	sharound_s(C,D,E,F,G,H,A,B,P[38]);
	sharound_s(B,C,D,E,F,G,H,A,P[39]);
	sharound_s(A,B,C,D,E,F,G,H,P[40]);
	sharound_s(H,A,B,C,D,E,F,G,P[41]);
	sharound_s(G,H,A,B,C,D,E,F,P[42]);
	sharound_s(F,G,H,A,B,C,D,E,P[43]);
	sharound_s(E,F,G,H,A,B,C,D,P[44]);
	sharound_s(D,E,F,G,H,A,B,C,P[45]);
	sharound_s(C,D,E,F,G,H,A,B,P[46]);
	sharound_s(B,C,D,E,F,G,H,A,P[47]);
	sharound_s(A,B,C,D,E,F,G,H,P[48]);
	sharound_s(H,A,B,C,D,E,F,G,P[49]);
	sharound_s(G,H,A,B,C,D,E,F,P[50]);
	sharound_s(F,G,H,A,B,C,D,E,P[51]);
	sharound_s(E,F,G,H,A,B,C,D,P[52]);
	sharound_s(D,E,F,G,H,A,B,C,P[53]);
	sharound_s(C,D,E,F,G,H,A,B,P[54]);
	sharound_s(B,C,D,E,F,G,H,A,P[55]);
	sharound_s(A,B,C,D,E,F,G,H,P[56]);
	sharound_s(H,A,B,C,D,E,F,G,P[57]);
	sharound_s(G,H,A,B,C,D,E,F,P[58]);
	sharound_s(F,G,H,A,B,C,D,E,P[59]);
	sharound_s(E,F,G,H,A,B,C,D,P[60]);

	s[7] += H;
}



#define ROT(x,n) (((x)<<(64-n))|( (x)>>(n)))

#define G(m,a,b,c,d,e,i)					\
  v[a] += (m[sigma[i+e]] ^ cst[sigma[i+e+1]]) + v[b];	\
  v[d] = ROT( v[d] ^ v[a],32);				\
  v[c] += v[d];						\
  v[b] = ROT( v[b] ^ v[c],25);				\
  v[a] += (m[sigma[i+e+1]] ^ cst[sigma[i+e]])+v[b];	\
  v[d] = ROT( v[d] ^ v[a],16);				\
  v[c] += v[d];						\
  v[b] = ROT( v[b] ^ v[c],11);				

//assumes input is 512 bytes
__kernel void search(__global uint8_t* in_param, __global uint* out_param, __global uint8_t* pad) 
{
	uchar in[512];
#pragma unroll
	for(uint i=0; i<128; ++i)
	in[i] = in_param[i];

	uint nonce = get_global_id(0);
	
	*(uint*)(in+108) = nonce;

	uint64_t h[8];
	h[0]=0x6A09E667F3BCC908UL;
	h[1]=0xBB67AE8584CAA73BUL;
	h[2]=0x3C6EF372FE94F82BUL;
	h[3]=0xA54FF53A5F1D36F1UL;
	h[4]=0x510E527FADE682D1UL;
	h[5]=0x9B05688C2B3E6C1FUL;
	h[6]=0x1F83D9ABFB41BD6BUL;
	h[7]=0x5BE0CD19137E2179UL;

	uint64_t v[16];
#pragma unroll
	for(uint i=0; i< 8;++i)  v[i] = h[i];
	v[ 8] = 0x243F6A8885A308D3UL;
	v[ 9] = 0x13198A2E03707344UL;
	v[10] = 0xA4093822299F31D0UL;
	v[11] = 0x082EFA98EC4E6C89UL;
	v[12] = 0x452821E638D01777UL;
	v[13] = 0xBE5466CF34E9086CUL;
	v[14] = 0xC0AC29B7C97C50DDUL;
	v[15] = 0x3F84D5B5B5470917UL;

	{
		uint64_t m[16];
#pragma unroll
		for(uint i=0; i<16;++i)  m[i] = U8TO64(in + i*8);
		uint i=0;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
		i+=16;
		G( m, 0, 4, 8,12, 0, i); G( m, 1, 5, 9,13, 2, i); G( m, 2, 6,10,14, 4, i); G( m, 3, 7,11,15, 6, i);
		G( m, 3, 4, 9,14,14, i); G( m, 2, 7, 8,13,12, i); G( m, 0, 5,10,15, 8, i); G( m, 1, 6,11,12,10, i);
	} 

#pragma unroll
	for(uint i=0; i<16;++i)  h[i&7] ^= v[i]; 

#pragma unroll
	for(uint i=0; i< 8;++i)  v[i] = h[i];
	v[8] = 0x243F6A8885A308D3UL;
	v[9] = 0x13198A2E03707344UL;
	v[10] = 0xA4093822299F31D0UL;
	v[11] = 0x082EFA98EC4E6C89UL;
	v[12] = 0x452821E638D01377UL;
	v[13] = 0xBE5466CF34E90C6CUL;
	v[14] = 0xC0AC29B7C97C50DDUL;
	v[15] = 0x3F84D5B5B5470917UL;

	{
		uint64_t m2[16] = {1UL << 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0x400};
		uint i=0;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
		i+=16;
		G(m2, 0, 4, 8,12, 0, i); G(m2, 1, 5, 9,13, 2, i); G(m2, 2, 6,10,14, 4, i); G(m2, 3, 7,11,15, 6, i);
		G(m2, 3, 4, 9,14,14, i); G(m2, 2, 7, 8,13,12, i); G(m2, 0, 5,10,15, 8, i); G(m2, 1, 6,11,12,10, i);
	} 

#pragma unroll
	for(uint i=0; i<16;++i)  h[i&7] ^= v[i];

	uint8_t* work2 = in+128;

	U64TO8( work2 + 0, h[0]);
	U64TO8( work2 + 8, h[1]);
	U64TO8( work2 +16, h[2]);
	U64TO8( work2 +24, h[3]);
	U64TO8( work2 +32, h[4]);
	U64TO8( work2 +40, h[5]);
	U64TO8( work2 +48, h[6]);
	U64TO8( work2 +56, h[7]);
	
	uint8_t* work3 = work2+64;
//a = x-1, b = x, c = x&63
#define WORKINIT(a,b,c)   work3[a] ^= work2[c]; \
		if(work3[a]&0x80) work3[b]=in[(b+work3[a])&0x7F]; \
		else              work3[b]=work2[(b+work3[a])&0x3F];

	
	work3[0] = work2[15];
	WORKINIT(0,1,1);
	WORKINIT(1,2,2);
	WORKINIT(2,3,3);
#pragma unroll
	for(int x=4;x<64;++x)
	{
		WORKINIT(x-1,x,x);
		++x;
		WORKINIT(x-1,x,x);
		++x;
		WORKINIT(x-1,x,x);
		++x;
		WORKINIT(x-1,x,x);
	}
#pragma unroll
	for(int x=64;x<320;++x)
	{
		WORKINIT(x-1,x,x&63);
		++x;
		WORKINIT(x-1,x,x&63);
		++x;
		WORKINIT(x-1,x,x&63);
		++x;
		WORKINIT(x-1,x,x&63);
	}

	#define READ_PAD32_R(offset) ((uint)pad[offset] | (((uint)pad[offset+1])<<8) | (((uint)pad[offset+2])<<16) | (((uint)pad[offset+3])<<24))
	
	#define READ_W32(offset) ((uint)work3[offset] + (((uint)work3[(offset)+1])<<8) + (((uint)work3[(offset)+2]&0x3F)<<16))

	ushort* shortptr = (ushort*)(work3+310);
	uint64 qCount = shortptr[0];
	qCount |= ((uint64)shortptr[3])<<48;
	uint* uintptr = (uint*)(work3+312);
	qCount |= ((uint64)*uintptr)<<16;

	uint nExtra=(pad[(qCount+work3[300])&0x3FFFFF]>>3)+512;
#pragma unroll
	for(uint x=1;x	{
		uint res = 0;
		qCount += READ_PAD32_R((qCount&0x3FFFFF));
		work3[qCount%320] += (qCount&0x87878700) ? 1 : 0;

		qCount-= pad[(qCount+work3[qCount%160])&0x3FFFFF];
		
		if(qCount&0x80000000)   { qCount+= pad[qCount&0xFFFF]; }
		else                    { res = qCount&0x20FAFB; qCount+= READ_PAD32_R(res); }

		res = (qCount+work3[qCount%160]) & 0x3FFFFF;
		qCount+= READ_PAD32_R(res);
		if(qCount&0xF0000000)        ++work3[qCount%320];

		res = READ_W32(qCount&0xFF);
		qCount+= READ_PAD32_R(res);
		work3[x%320]=work2[x&63]^(qCount&0xFF);

		res = ((qCount>>32)+work3[x%200]) & 0x3FFFFF;
		qCount+= READ_PAD32_R(res);
		
		#define OFFS (qCount&3)
		uint* ram = (uint *)(work3+((qCount%316)-OFFS));
		uint val = amd_bytealign((uint32)(qCount>>24), (uint32)(qCount>>24), (uint32)(4-OFFS));
		ram[0] ^= val&(0xFFFFFFFFL<<(OFFS<<3));
		ram[1] ^= val&(0xFFFFFFFFL>>(32-(OFFS<<3)));

		x += ((qCount&7)==3);

		qCount-= pad[x*x];
		if((qCount&0x07)==0x01) ++x;
	}

	uint s[8]= {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};

	Sha256_round(s, in);
	Sha256_round(s, in+64);
	Sha256_round(s, in+128);
	Sha256_round(s, in+192);
	Sha256_round(s, in+256);
	Sha256_round(s, in+320);
	Sha256_round(s, in+384);
	Sha256_round(s, in+448);
	Sha256_round_padding(s);
	
	if ((s[7] & 0x80FFFF) == 0)
	{
		out_param[nonce&0xFF] = get_global_id(0);
	}
}

tacotime

legendary

Activity: 1484

Merit: 1005

I benched it earlier and it's about 3% faster, you should see some improvement.

Ten98

sr. member

Activity: 1008

Merit: 250

Tacotime I've tried your optimised code and it runs no faster.

OpenCL automatically sends multiple workloads to the GPU compute units to work on in parallel, you don't have to do it yourself in the code, so I don't think your theory about making the code being more parallel to speed things up will hold true.

makomk

hero member

Activity: 686

Merit: 564

Quote from: tacotime on October 28, 2011, 12:19:40 PM

- The if/else statements can be avoided but have virtually no effect on the speed of GPU (while the CPU code seems to paradoxically benefit a little). This goes against everything CH said about if/else statements being difficult for the GPU.

Well yeah, it wouldn't. Certain kinds of if-else statements cause poor performance on GPUs but I don't think the kind of branching he's using will, whereas it you don't have a clever compiler you will pay a penalty for it on CPUs.

tacotime

legendary

Activity: 1484

Merit: 1005

Okay, I've been playing around with the code for a few days now.

- The if/else statements can be avoided but have virtually no effect on the speed of GPU (while the CPU code seems to paradoxically benefit a little). This goes against everything CH said about if/else statements being difficult for the GPU.
- The main problem with reaper's implementation is that it does allow per-compute unit parallelization. The code is run on the OCL kernel, but not in parallel; ideally the search() function that is executed should be handed 32 or more data sets to work on and then execute all data sets by a for loop, and then these should be synced with a barrier and output to the global memory. The current code hands one data set to one compute unit and likely takes a very hard hit in terms of parallelization. With all the coprocessors working on the data set, a speed up on the order of a magnitude for GPUs should be possible (several megahashes per second).

Whoever has time to do this and cares, have a look at the OCL examples from nVidia and how they parallelize much more effectively than the current reaper code: http://developer.nvidia.com/opencl-sdk-code-samples

Intention

full member

Activity: 128

Merit: 100

Quote from: tacotime on October 26, 2011, 10:48:23 PM

Quote from: Intention on October 26, 2011, 10:04:23 PM

Attempted to try your code taco but I get a build error from /usr/lib/ld about -lOpenCL missing.

You need to install the opencl library

I attempted to however as a Linux n00b everything is pretty open ended. I believe I installed the ATI Stream SDK which came with the OpenCL stuff even though the computer itself has an older Nvidia card I was just hoping to compile it on there for my Windows PC that has radeon cards.

tacotime

legendary

Activity: 1484

Merit: 1005

Quote from: Intention on October 26, 2011, 10:04:23 PM

Attempted to try your code taco but I get a build error from /usr/lib/ld about -lOpenCL missing.

You need to install the opencl library

Intention

full member

Activity: 128

Merit: 100

Attempted to try your code taco but I get a build error from /usr/lib/ld about -lOpenCL missing.

tacotime

legendary

Activity: 1484

Merit: 1005

Here is the more optimized RSHash.cpp

Code:

#include "RSHash.h"
#include "Blake512.h"
#include "SHA256.h"
#include 
#include 
using std::cout;
using std::endl;

#define PHI 0x9e3779b9
#define BLOCKHASH_1_PADSIZE (1024*1024*4)

typedef unsigned int uint32;
typedef unsigned long long int uint64;

static uint32 BlockHash_1_Q[4096],BlockHash_1_c,BlockHash_1_i;
unsigned char *BlockHash_1_MemoryPAD8;
uint32 *BlockHash_1_MemoryPAD32;

uint32 BlockHash_1_rand(void)
{
    uint32 x, r = 0xfffffffe;
    uint64 t, a = 18782LL;
    BlockHash_1_i = (BlockHash_1_i + 1) & 4095;
    t = a * BlockHash_1_Q[BlockHash_1_i] + BlockHash_1_c;
    BlockHash_1_c = (t >> 32);
    x = (t + BlockHash_1_c)&0xFFFFFFFF;
    (x < BlockHash_1_c) && ( x++ && BlockHash_1_c++ );
    return (BlockHash_1_Q[BlockHash_1_i] = r - x);
}

#include 

void BlockHash_Init()
{
    static unsigned char SomeArrogantText1[]="Back when I was born the world was different. As a kid I could run around the streets, build things in the forest, go to the beach and generally live a care free life. Sure I had video games and played them a fair amount but they didn't get in the way of living an adventurous life. The games back then were different too. They didn't require 40 hours of your life to finish. Oh the good old days, will you ever come back?";
    static unsigned char SomeArrogantText2[]="Why do most humans not understand their shortcomings? The funny thing with the human brain is it makes everyone arrogant at their core. Sure some may fight it more than others but in every brain there is something telling them, HEY YOU ARE THE MOST IMPORTANT PERSON IN THE WORLD. THE CENTER OF THE UNIVERSE. But we can't all be that, can we? Well perhaps we can, introducing GODria, take 2 pills of this daily and you can be like RealSolid, lord of the universe.";
    static unsigned char SomeArrogantText3[]="What's up with kids like artforz that think it's good to attack other's work? He spent a year in the bitcoin scene riding on the fact he took some other guys SHA256 opencl code and made a miner out of it. Bravo artforz, meanwhile all the false praise goes to his head and he thinks he actually is a programmer. Real programmers innovate and create new work, they win through being better coders with better ideas. You're not real artforz, and I hear you like furries? What's up with that? You shouldn't go on IRC when you're drunk, people remember the weird stuff.";
    BlockHash_1_MemoryPAD8 = new unsigned char[BLOCKHASH_1_PADSIZE+8];  //need the +8 for memory overwrites
    BlockHash_1_MemoryPAD32 = (uint32*)BlockHash_1_MemoryPAD8;

    BlockHash_1_Q[0] = 0x6970F271;
    BlockHash_1_Q[1] = 0x6970F271 + PHI;
    BlockHash_1_Q[2] = 0x6970F271 + PHI + PHI;
    for (int i = 3; i < 4096; ++i)  BlockHash_1_Q[i] = BlockHash_1_Q[i - 3] ^ BlockHash_1_Q[i - 2] ^ PHI ^ i;
    BlockHash_1_c=362436;
    BlockHash_1_i=4095;

    int count1=0,count2=0,count3=0;
    for(int x=0;x<(BLOCKHASH_1_PADSIZE/4)+2;++x)  BlockHash_1_MemoryPAD32[x] = BlockHash_1_rand();
    for(int x=0;x    {
        switch(BlockHash_1_MemoryPAD8[x]&3)
        {
            case 0: BlockHash_1_MemoryPAD8[x] ^= SomeArrogantText1[count1++]; if(count1>=sizeof(SomeArrogantText1)) count1=0; break;
            case 1: BlockHash_1_MemoryPAD8[x] ^= SomeArrogantText2[count2++]; if(count2>=sizeof(SomeArrogantText2)) count2=0; break;
            case 2: BlockHash_1_MemoryPAD8[x] ^= SomeArrogantText3[count3++]; if(count3>=sizeof(SomeArrogantText3)) count3=0; break;
            case 3: BlockHash_1_MemoryPAD8[x] ^= 0xAA; break;
        }
    }
}

void BlockHash_DeInit()
{
    delete[] BlockHash_1_MemoryPAD8;
}

const uint32 PAD_MASK = BLOCKHASH_1_PADSIZE-1;
typedef unsigned char uchar;

bool BlockHash_1(unsigned char *p512bytes, unsigned char* final_hash)
{
    //0->127   is the block header      (128)
    //128->191 is blake(blockheader)    (64)
    //192->511 is scratch work area     (320)

    unsigned char *work1 = p512bytes;
    unsigned char *work2=work1+128;
    unsigned char *work3=work1+192;

    blake512_hash(work2,work1);

    //setup the 320 scratch with some base values
    work3[0] = work2[15];
    for(int x=1;x<320;++x)
    {
        work3[x-1] ^= work2[x&63];
        (work3[x-1]<0x80) ? work3[x]=work2[(x+work3[x-1])&63] : work3[x]=work1[(x+work3[x-1])&127];
    }
	
    #define READ_PAD8(offset) BlockHash_1_MemoryPAD8[(offset)&PAD_MASK]
    #define READ_PAD32(offset) (*((uint32*)&BlockHash_1_MemoryPAD8[(offset)&PAD_MASK]))

	uint64 qCount = *((uint64*)&work3[310]);
    int nExtra=READ_PAD8(qCount+work3[300])>>3;
    for(int x=1;x<512+nExtra;++x)
    {
        qCount+= READ_PAD32( qCount );
        qCount&0x87878700 && work3[qCount%320]++;

        qCount-= READ_PAD8( qCount+work3[qCount%160] );
        qCount&0x80000000 ? qCount+= READ_PAD8( qCount&0x8080FFFF ) : qCount+= READ_PAD32( qCount&0x7F60FAFB );

        qCount+= READ_PAD32( qCount+work3[qCount%160] );
        qCount&0xF0000000 && work3[qCount%320]++;

        qCount+= READ_PAD32( *((uint32*)&work3[qCount&0xFF]) );
        work3[x%320]=work2[x&63]^uchar(qCount);

        qCount+= READ_PAD32( (qCount>>32)+work3[x%200] );
        *((uint32*)&work3[qCount%316]) ^= (qCount>>24)&0xFFFFFFFF;
        ((qCount&0x07)==0x03) && x++;
        qCount-= READ_PAD8( (x*x) );
        ((qCount&0x07)==0x01) && x++;
     }

     Sha256(work1, final_hash);
     return true;
}

I eliminated all of the cool if/else statements coinhunter was for some reason so proud of before, I'm not sure why exactly. It appears to get +3% or so performance for me.

Quote

Wait. Isn't he a $150 an hour, unemployed coding genius who spent 20,000 hours on this? I think you are just misunderstanding the code.

The multiGPU coding appears fucked, actually I seem to get lower hash rates using two GPUs instead of putting "device 0" in the config file and only using the first one. Huh

If you put "device 1" in the reaper config though, SEGFAULT!

Edit: device 1 works on another motherboard, I'm wondering if maybe something is weird with my operating system on this computer... too tired, going home.

tacotime

legendary

Activity: 1484

Merit: 1005

Bumping again, having a lot of fun here... optimized Coinhunter's code for mining and now I'm pulling 150 kh/s on a single GTX 570. I'll published later when I'm done with more optimizations. The OpenCL multidevice coding is totally fucked by the use of pthread and multigpu setups will take about a 70% performance hit for the second GPU. This can not be fixed by running separate instances, because there are segfaults if you set it to only use a 2nd or 3rd etc device.

tacotime

legendary

Activity: 1484

Merit: 1005

Okay, this fix is better and just restarts the miner if it dies. simply run this script in bash:

Code:

until ./run_reaper.sh; do
    echo "Server 'run_reaper.sh' crashed with exit code $?.  Respawning.." >&2
    sleep 1
done

where run_reaper.sh is the script to run reaper

tacotime

legendary

Activity: 1484

Merit: 1005

reaper 0.7's error handling is befuckered at best... to keep it from crashing with libcurl, change:

Code:

CURLcode code = curl_easy_perform(curl);
	if(code != CURLE_OK)
	{
		if (code == CURLE_COULDNT_CONNECT)
		{
			cout << "Could not connect. Server down?" << endl;
		}
		else
		{
			cout << "Error " << code << " submitting work. See http://curl.haxx.se/libcurl/c/libcurl-errors.html for error code explanations." << endl;
		}
	}
	curl_slist_free_all(headerlist);

So that it loops and sleeps if the data to return is null

edit: quickfix

Code:

        CURLcode code = curl_easy_perform(curl);
        while (code != CURLE_OK){
            if (code == CURLE_COULDNT_CONNECT)
            {
                   cout << "Could not connect. Server down?" << endl;
                   sleep(5);
            }
            else
            {
                   cout << "Error " << code << " submitting work. See http://curl.haxx.se/libcurl/c/libcurl-errors.html for error code explanations." << endl;
                   sleep(5);
            }
            code = curl_easy_perform(curl);
        }

Raoul Duke

legendary

Activity: 1386

Merit: 1002

Quote from: ?? on ??

Ha ha if you only knew who I really was...

Is that you, Cheetah?

BitterTea

sr. member

Activity: 294

Merit: 252

Quote from: Raoul Duke on October 24, 2011, 10:03:53 PM

I told you before and I'll tell you again:

Chupa-me a pila e chama-me tarzan...

If you've got to obfuscate what you say so that others can't understand it, why say it at all?

Translated:

Quote

Suck my dick and call me tarzan

Raoul Duke

legendary

Activity: 1386

Merit: 1002

I told you before and I'll tell you again:

Chupa-me a pila e chama-me tarzan...

simonk83

hero member

Activity: 798

Merit: 1000

Quote from: Raoul Duke on October 24, 2011, 09:35:58 PM

That showed everybody who the retarded is here... and it ain't me

Ah, the irony is delicious.

Raoul Duke

legendary

Activity: 1386

Merit: 1002

Quote from: BitterTea on October 24, 2011, 09:42:33 PM

Quote from: Raoul Duke on October 24, 2011, 09:41:25 PM

here is the thread and a moderator confirming you said it.

https://bitcointalksearch.org/topic/bitcoinexpress-message-to-me-48245

So, who's the retard now?

You are a moron... btw, Tor is for pedo's like you...

I'm Satoshi. Any mod can confirm that I, BitterTea did in fact say this.

ask lolclown...

but i follow you, we all know that BCX is a dumbass full of shit that never tells the truth, so why believe him...

It's fun to pwn retards(BCX and BitterTea) on their own game...

Topic: delete (Read 8884 times)