OK Thanks for testing. I give up for the moment. I run out of ideas.
I let the volatile.
Hope I will manage to reproduce this.
It was the Bitcointalk forum that inspired us to create Bitcointalksearch.org - Bitcointalk is an excellent site that should be the default page for anybody dealing in cryptocurrency, since it is a virtual gold-mine of data. However, our experience and user feedback led us create our site; Bitcointalk's search is slow, and difficult to get the results you need, because you need to log in first to find anything useful - furthermore, there are rate limiters for their search functionality.
The aim of our project is to create a faster website that yields more results and faster without having to create an account and eliminate the need to log in - your personal data, therefore, will never be in jeopardy since we are not asking for any of your data and you don't need to provide them to use our site with all of its capabilities.
We created this website with the sole purpose of users being able to search quickly and efficiently in the field of cryptocurrency so they will have access to the latest and most accurate information and thereby assisting the crypto-community at large.
//_ModMult(_p2, _s, _s); // _p = pow2(s)
_ModSqr(_p2, _s);
//_ModMult(py, _s);
_ModSqr(py, _s);
__device__ void _ModSqr(uint64_t *rp, const uint64_t *up) {
uint64_t u0 = up[0];
uint64_t u1 = up[1];
uint64_t u2 = up[2];
uint64_t u3 = up[3];
uint64_t u10, u11;
uint64_t r0 = 0;
uint64_t r1 = 0;
uint64_t r3 = 0;
uint64_t r4 = 0;
uint64_t t1 = 0;
uint64_t t2 = 0;
uint64_t s0, s1, s2, s3, s4, s5, s6, s7;
uint64_t z1, z2, z3, z4, z5, z6, z7, z8;
z1 = z2 = 0;
//k=0
UMULLO(s0, u0, u0);
UMULHI(r1, u0, u0);
//k=1
UMULLO(r3, u0, u1);
UMULHI(r4, u0, u1);
UADDO1(r4, r4);
UADDC1(u0, u0);
UADDC1(r4, u1);
UADDC1(u0, 0x0ULL);
UADD1(r3, 0x0ULL);
s1 = r3;
//k=2
UMULLO(r0, u0, u2);
UMULHI(r1, u0, u2);
UADDO1(r0, r0);
UADDC1(r1, r1);
UADD1(t2, 0x0ULL);
UMULLO(u10, u1, u1);
UMULHI(u11, u1, u1);
UADDO1(r0, u10);
UADDC1(r1, u11);
UADD1(t2, 0x0ULL);
UADDO1(r0, r4);
UADDC1(r1, t1);
UADD1(t2, 0x0ULL);
s2 = r0;
t1 = 0;
//k=3
UMULLO(r3, u0, u3);
UMULHI(r4, u0, u3);
UMULLO(u10, u1, u2);
UMULHI(u11, u1, u2);
UADDO1(r3, u10);
UADDC1(r4, u11);
UADD1(t1, 0x0ULL);
t1 += t1;
UADDO1(r3, r3);
UADDC1(r4, r4);
UADD1(t1, 0x0ULL);
UADDO1(r3, r1);
UADDC1(r4, t2);
UADD1(t1, 0x0ULL);
s3 = r3;
t2 = 0;
//k=4
UMULLO(r0, u1, u3);
UMULHI(r1, u1, u3);
UADDO1(r0, r0);
UADDC1(r1, r1);
UADD1(t2, 0x0ULL);
UMULLO(u10, u2, u2);
UMULHI(u11, u2, u2);
UADDO1(r0, u10);
UADDC1(r1, u11);
UADD1(t2, 0x0ULL);
UADDO1(r0, r4);
UADDC1(r1, t1);
UADD1(t2, 0x0ULL);
s4 = r0;
t1 = 0;
//k=5
UMULLO(r3, u2, u3);
UMULHI(r4, u2, u3);
UADDO1(r3, r3);
UADDC1(r4, r4);
UADD1(t1, 0x0ULL);
UADDO1(r3, r1);
UADDC1(r4, t2);
UADD1(t1, 0x0ULL);
s5 = r3;
//k=6
UMULLO(r0, u3, u3);
UMULHI(r1, u3, u3);
UADDO1(r0, r4);
UADD1(r1, t1);
s6 = r0;
//k=7
s7 = r1;
//Reduction
UMULLO(z3, s5, 0x1000003d1);
UMULHI(z4, s5, 0x1000003d1);
UMULLO(z5, s6, 0x1000003d1);
UMULHI(z6, s6, 0x1000003d1);
UMULLO(z7, s7, 0x1000003d1);
UMULHI(z8, s7, 0x1000003d1);
UMULLO(z1, s4, 0x1000003d1ULL);
UMULHI(z2, s4, 0x1000003d1ULL);
UADDO1(z1, s0);
UADD1(z2, 0x0ULL);
UADDO1(z2, s1);
UADDC1(z4, s2);
UADDC1(z6, s3);
UADD1(z8, 0x0ULL);
uint64_t c = 0;
UADDO1(z3, z2);
UADDC1(z5, z4);
UADDC1(z7, z6);
UADD1(z8, 0x0ULL);
UMULLO(u10, z8, 0x1000003d1ULL);
UMULHI(u11, z8, 0x1000003d1ULL);
UADDO1(z1, u10);
UADDC1(z3, u11);
UADD1(c, 0x0ULL);
rp[0] = z1;
rp[1] = z3;
if(c == 1){
UADDC1(z5, 0x1ULL);
UADD1(z7, 0x0ULL);
}
rp[2] = z5;
rp[3] = z7;
}
#define SET0(a) asm volatile ("mov.u64 %0,0;" : "=l"(a))
// ---------------------------------------------------------------------------------------
// Compute a*b*(mod n)
// a and b must be lower than n
// ---------------------------------------------------------------------------------------
__device__ void _ModMult(uint64_t *r, uint64_t *a, uint64_t *b) {
uint64_t r512[8];
uint64_t t[NBBLOCK];
uint64_t ah,al;
SET0(r512[5]);
SET0(r512[6]);
SET0(r512[7]);
// 256*256 multiplier
#define SET0(a) asm volatile ("mov.u64 %0,0;" : "=l"(a))
// ---------------------------------------------------------------------------------------
// Compute a*b*(mod n)
// a and b must be lower than n
// ---------------------------------------------------------------------------------------
__device__ void _ModMult(uint64_t *r, uint64_t *a, uint64_t *b) {
uint64_t r512[8];
uint64_t t[NBBLOCK];
uint64_t ah,al;
SET0(r512[5]);
SET0(r512[6]);
SET0(r512[7]);
// 256*256 multiplier
// Reduce from 512 to 320
- UMult(t,(r512 + 4), 0x1000003D1ULL);
+ UMULLO(t[0],r512[4],0x1000003D1ULL);
+ UMULLO(t[1],r512[5],0x1000003D1ULL);
+ MADDO(t[1], r512[4],0x1000003D1ULL,t[1]);
+ UMULLO(t[2],r512[6],0x1000003D1ULL);
+ MADDC(t[2],r512[5],0x1000003D1ULL, t[2]);
+ UMULLO(t[3],r512[7],0x1000003D1ULL);
+ MADDC(t[3],r512[6],0x1000003D1ULL, t[3]);
+ MADD(t[4],r512[7],0x1000003D1ULL, 0ULL);
// Reduce from 512 to 320
- UMult(t,(r512 + 4), 0x1000003D1ULL);
+ UMULLO(t[0],r512[4],0x1000003D1ULL);
+ UMULLO(t[1],r512[5],0x1000003D1ULL);
+ MADDO(t[1], r512[4],0x1000003D1ULL,t[1]);
+ UMULLO(t[2],r512[6],0x1000003D1ULL);
+ MADDC(t[2],r512[5],0x1000003D1ULL, t[2]);
+ UMULLO(t[3],r512[7],0x1000003D1ULL);
+ MADDC(t[3],r512[6],0x1000003D1ULL, t[3]);
+ MADD(t[4],r512[7],0x1000003D1ULL, 0ULL);
volatile uint64_t r512[8];
volatile uint64_t r512[8];
~/VanitySearch50_cuda8$ ./VanitySearch50_cuda8 -check -g 1
GetBase10() Results OK
Add() Results OK : 333.333 MegaAdd/sec
Mult() Results OK : 29.674 MegaMult/sec
Div() Results OK : 5.556 MegaDiv/sec
ModInv()/ModExp() Results OK
ModInv() Results OK : 341.867 KiloInv/sec
IntGroup.ModInv() Results OK : 7.327 MegaInv/sec
ModMulK1() Results OK : 11.682 MegaMult/sec
ModMulK1order() Results OK : 6.460 MegaMult/sec
ModSqrt() Results OK !
Check Generator :OK
Check Double :OK
Check Add :OK
Check GenKey :OK
Adress : 15t3Nt1zyMETkHbjJTTshxLnqPzQvAtdCe OK!
Adress : 1BoatSLRHtKNngkdXEeobR76b53LETtpyT OK!
Adress : 1JeanLucgidKHxfY5gkqGmoVjo1yaU4EDt OK(comp)!
Adress : 1Test6BNjSJC5qwYXsjwKVLvz7DpfLehy OK!
Adress : 1BitcoinP7vnLpsUHWbzDALyJKnNo16Qms OK(comp)!
Check Calc PubKey (full) 1ViViGLEawN27xRzGrEhhYPQrZiTKvKLo :OK
Check Calc PubKey (even) 1Gp7rQ4GdooysEAEJAS2o4Ktjvf1tZCihp:OK
Check Calc PubKey (odd) 18aPiLmTow7Xgu96msrDYvSSWweCvB9oBA:OK
GPU: GPU #0 Quadro M2200 (8x128 cores) Grid(64x128)
Seed: 596970
123.502 MegaKey/sec
ComputeKeys() found 1594 items , CPU check...
Expected item not found 3412910a c97422a4 6f11601a 8c75dbba a494e3c4 (thread=87, incr=-540, endo=0)
Expected item not found 34124e60 837e83bf aba37043 d981e8a7 3ba919f9 (thread=99, incr=-257, endo=0)
Expected item not found 34124b15 09d084f5 c09be79e b9e74233 a5d04c9a (thread=133, incr=184, endo=2)
Expected item not found fefed61a e1a5ee3e d71f81fa 7ed01482 1df88b0f (thread=149, incr=850, endo=2)
Expected item not found fefeb4ca 86752243 387f97b1 1ec5fc4f ab2e23cd (thread=204, incr=682, endo=1)
Expected item not found 3412af0c e80a5462 96280598 760e3541 3c0c7c79 (thread=207, incr=-470, endo=0)
Expected item not found 34122971 0483c8a0 0f392737 ffd3e8aa 20f36367 (thread=234, incr=-91, endo=2)
Expected item not found 3412b84c 7dd3e53f e5c00f67 d44fac8f 594dc830 (thread=249, incr=-547, endo=1)
Expected item not found 34127635 e84de0de f0b9672f ef7f52eb 853b6579 (thread=278, incr=-153, endo=0)
Expected item not found 3412e146 03eaa33c 3e4e3cfc 32448e75 87ddbc8c (thread=300, incr=-648, endo=0)
Expected item not found fefe49af b082f946 430aa009 d722e7b9 85848f2e (thread=309, incr=576, endo=2)
Expected item not found fefe67ad c0e86d66 4c92c703 e853c833 ee684ddc (thread=350, incr=865, endo=1)
Expected item not found 341293f0 85b21f8d 2c97f992 b66f8417 d5762b62 (thread=357, incr=-283, endo=0)
Expected item not found 34126be8 99868951 6f0abbbc 45b5acb9 7a8b8978 (thread=357, incr=-950, endo=1)
Expected item not found fefe4071 da662ebc 6e1132df 9fc940aa 4c73f6b4 (thread=414, incr=277, endo=1)
Expected item not found 3412be76 2b3f96d1 3c1f70fd 19e54210 8bb78a9a (thread=422, incr=-773, endo=1)
Expected item not found fefe1392 83313cc8 622f7b04 8f1acfcc a6973c04 (thread=441, incr=508, endo=2)
Expected item not found fefe356e dd82a5cc ad8f25d7 7e048d04 6cb9668d (thread=474, incr=-461, endo=1)
Expected item not found 34123606 dbee7d71 ff8fa64a 189afb61 71eede71 (thread=486, incr=-534, endo=0)
Expected item not found fefe7242 ab68602b f635577a 9f44ea15 2c7f99ca (thread=504, incr=439, endo=1)
Expected item not found 341210cd d27ced94 b10cda99 0cb8eef3 25bccc2e (thread=524, incr=-929, endo=2)
Expected item not found 3412b95e a84c3c11 04a60e99 2b662810 ce5bb025 (thread=530, incr=-507, endo=2)
Expected item not found fefec926 3c641602 28123d8a ef66b036 2d6d5298 (thread=564, incr=-581, endo=0)
Expected item not found 34124dfe f8227df3 39cc2aac 5fa89e87 1d48a18b (thread=578, incr=-690, endo=0)
Expected item not found fefea0bd 871357d4 6711cb08 415cb045 13054cd4 (thread=620, incr=-1012, endo=1)
Expected item not found fefe81a3 8ac675ce 43d1af2f 4032ffdd 1b9e2c41 (thread=622, incr=720, endo=1)
Expected item not found fefeee16 10039563 1325c5a1 7e4008e0 dfeb643b (thread=626, incr=-815, endo=2)
Expected item not found fefe3f11 1d5af4c0 02531103 27245668 e16e18bb (thread=631, incr=-224, endo=1)
Expected item not found fefe0722 e8c35df1 59dedc91 75c0b34c 53e207d0 (thread=720, incr=610, endo=1)
Expected item not found 341205e3 8ae3fe31 8bb77fe3 d6770770 4fbb5142 (thread=737, incr=-585, endo=0)
Expected item not found 3412a4dd 15b0f82a 37b8f95b a13d6403 40a179d9 (thread=745, incr=348, endo=1)
Expected item not found 3412e545 6a30b568 10894417 65d1c745 f0b36472 (thread=752, incr=-299, endo=0)
Expected item not found 3412c1b2 fb6e7210 acd4429c 00f57161 f02c555c (thread=780, incr=312, endo=2)
.....
CPU found 1548 items
GPU: point correct [238/238]
GPU: endo #1 correct [213/273]
GPU: endo #2 correct [202/271]
GPU: sym/point correct [108/226]
GPU: sym/endo #1 correct [207/277]
GPU: sym/endo #2 correct [202/263]
GPU/CPU check Failed !
export LD_LIBRARY_PATH=.
./VanitySearch50 ...
export LD_LIBRARY_PATH=.
./VanitySearch50 ...
__device__ void ModSub256(uint64_t *rp, uint64_t *ap, uint64_t *bp) {
uint64_t a0, a1, a2, a3, b0, b1, b2, b3, r0, r1, r2, r3;
int8_t c0, c1, c2, c3;
a0 = ap[0];
a1 = ap[1];
a2 = ap[2];
a3 = ap[3];
b0 = bp[0];
b1 = bp[1];
b2 = bp[2];
b3 = bp[3];
/*
r0 = a0 - b0;
c0 = (a0 < b0) ? 1 : -1;
c0 = (r0 == 0) ? 0 : c0;
r1 = a1 - b1;
c1 = (a1 < b1) ? 1 : -1;
c1 = (r1 == 0) ? c0 : c1;
r1 = r1 - (c0 == 1);
r2 = a2 - b2;
c2 = (a2 < b2) ? 1 : -1;
c2 = (r2 == 0) ? c1 : c2;
r2 = r2 - (c1 == 1);
r3 = a3 - b3;
c3 = (a3 < b3) ? 1 : -1;
c3 = (r3 == 0) ? c2 : c3;
r3 = r3 - (c2 == 1);
*/
c0 = a0 < b0;
r0 = a0 - b0;
c1 = a1 < b1;
r1 = a1 - b1;
if(r1 == 0){ c1 = c0;}
if(c0) {r1 = r1 - 1;}
c2 = a2 < b2;
r2 = a2 - b2;
if(r2 == 0){ c2 = c1;}
if(c1) {r2 = r2 - 1;}
c3 = a3 < b3;
r3 = a3 - b3;
if(r3 == 0){ c3 = c2;}
if(c2) {r3 = r3 - 1;}
if(c3 == 1){
if(r0 > 0x1000003d0){ //almost always --> no borrow
r0 = r0 - 0x1000003d1;
}
else{
//c[0] = (r0 < 0x1000003d1) ? 1 : -1;
//c0 = (r0 == 0x1000003d1) ? 0 : 1;
//c0 = 1; // for sure r0 < 0x1000003d1
r0 = r0 - 0x1000003d1;
r1 = r1 - 1; //c0 is 1
c1 = (r1 == 0xffffffffffffffff) ? 1 : -1;
c2 = (r2 == 0) ? c1 : -1;
if(c1 == 1) r2 = r2 - 1;
if(c2 == 1) r3 = r3 - 1;
};
};
rp[0] = r0;
rp[1] = r1;
rp[2] = r2;
rp[3] = r3;
return;
}
ModNeg256(dy,Gy[i]); <--
ModSub256(dy, py);
ModSub256(dy, pyn, Gy[i]);
ModNeg256(pyn,py);
ModAdd256(py, Gy[i]);
ModSub256(py, sy);
ModSub256(dy, pyn, Gy[i]);
_ModMult(_s, dy, dx[i]); // s = (p2.y-p1.y)*inverse(p2.x-p1.x)
_ModMult(_p2, _s, _s); // _p = pow2(s)
ModSub256(px, _p2, px);
ModSub256(px, Gx[i]); // px = pow2(s) - p1.x - p2.x;
ModSub256(py, sx, px);
_ModMult(py, _s); // py = - s*(ret.x-p2.x)
ModSub256(py, sy); // py = - p2.y - s*(ret.x-p2.x);
if ((int64_t)t < 0) {
UADDO1(r[0], _P[0]);
UADDC1(r[1], _P[1]);
UADDC1(r[2], _P[2]);
UADD1(r[3], _P[3]);
}
if ((int64_t)t < 0) {
USUBO1(r[0], 0x01000003d1);
USUBC1(r[1], 0ULL);
USUBC1(r[2], 0ULL);
USUBC1(r[3], 0ULL);
}