OK, which release of gcc are you using for compiling VanitySearch (not the CUDA code) ?
It was the Bitcointalk forum that inspired us to create Bitcointalksearch.org - Bitcointalk is an excellent site that should be the default page for anybody dealing in cryptocurrency, since it is a virtual gold-mine of data. However, our experience and user feedback led us create our site; Bitcointalk's search is slow, and difficult to get the results you need, because you need to log in first to find anything useful - furthermore, there are rate limiters for their search functionality.
The aim of our project is to create a faster website that yields more results and faster without having to create an account and eliminate the need to log in - your personal data, therefore, will never be in jeopardy since we are not asking for any of your data and you don't need to provide them to use our site with all of its capabilities.
We created this website with the sole purpose of users being able to search quickly and efficiently in the field of cryptocurrency so they will have access to the latest and most accurate information and thereby assisting the crypto-community at large.
// Reduce from 320 to 256
UADD1(t[4],0ULL);
UMULLO(al,t[4], 0x1000003D1ULL);
UMULHI(ah,t[4], 0x1000003D1ULL);
UADDO(r[0],r512[0], al);
UADDC(r[1],r512[1], ah);
UADDC(r[2],r512[2], 0ULL);
UADD(r[3],r512[3], 0ULL);
UADD1(r[3],0x07ULL); <-- error!!!
CPU found 1539 items
GPU: point correct [0/271]
GPU: endo #1 correct [0/248]
GPU: endo #2 correct [0/260]
GPU: sym/point correct [0/255]
GPU: sym/endo #1 correct [0/265]
GPU: sym/endo #2 correct [0/240]
GPU/CPU check Failed !
~/VanitySearch$ ./VanitySearch -stop -t 7 -gpu 1111
Difficulty: 16777216
Search: 1111 [Compressed]
Start Sat Mar 23 18:39:22 2019
Base Key:12FF1E3D528DC8068438E8ED181E1F2505E877A7543869B0B38E500F5FA284F9
Number of CPU thread: 7
GPU: GPU #0 Quadro M2200 (8x128 cores) Grid(64x128)
Pub Addr: 1111Cf8ucVbgUtANTRGwQsWVpXVZvqFT6
Prv Addr: 5HxepgskWZ53AokCCvk8d1ZZGinupSX4Sm7tNQygZ9zQpkftRQJ
Prv Key : 0x12FF1E3D528DC8068438E8ED181E1F2505E877A7543869B5B38E500F5FA4D5D3
Check : 1DFm6mzxxKqFo9bysKC9x1TxEz5Z9d9uAb
Check : 1111Cf8ucVbgUtANTRGwQsWVpXVZvqFT6 (comp)
for (uint32_t i = 0; i < HSIZE; i++) {
// P = StartPoint + i*G
Load256(px, sx);
Load256(py, sy);
ModSub256(dy, Gy[i], py);
_ModMult(_s, dy, dx[i]); // s = (p2.y-p1.y)*inverse(p2.x-p1.x)
//_ModMult(_p2, _s, _s); // _p = pow2(s)
_ModSqr(_p2, _s);
ModSub256(px, _p2,px);
ModSub256(px, Gx[i]); // px = pow2(s) - p1.x - p2.x;
/*
ModSub256(py, Gx[i], px);
_ModMult(py, _s); // py = - s*(ret.x-p2.x)
ModSub256(py, Gy[i]); // py = - p2.y - s*(ret.x-p2.x);
*/
CHECK_PREFIX(GRP_SIZE / 2 + (i + 1));
// P = StartPoint - i*G, if (x,y) = i*G then (x,-y) = -i*G
Load256(px, sx);
Load256(py, sy);
//ModNeg256(dy,Gy[i]);
//ModSub256(dy, py);
ModSub256(dy, pyn, Gy[i]);
_ModMult(_s, dy, dx[i]); // s = (p2.y-p1.y)*inverse(p2.x-p1.x)
//_ModMult(_p2, _s, _s); // _p = pow2(s)
_ModSqr(_p2, _s);
ModSub256(px, _p2, px);
ModSub256(px, Gx[i]); // px = pow2(s) - p1.x - p2.x;
/*
ModSub256(py, Gx[i], px);
_ModMult(py, _s); // py = - s*(ret.x-p2.x)
ModAdd256(py, Gy[i]); // py = - p2.y - s*(ret.x-p2.x);
//ModSub256(py, sx, px);
//_ModMult(py, _s); // py = - s*(ret.x-p2.x)
//ModSub256(py, sy);
*/
CHECK_PREFIX(GRP_SIZE / 2 - (i + 1));
}
// First point (startP - (GRP_SZIE/2)*G)
Load256(px, sx);
Load256(py, sy);
ModNeg256(dy, Gy[i]);
ModSub256(dy, py);
_ModMult(_s, dy, dx[i]); // s = (p2.y-p1.y)*inverse(p2.x-p1.x)
//_ModMult(_p2, _s, _s); // _p = pow2(s)
_ModSqr(_p2, _s);
ModSub256(px, _p2, px);
ModSub256(px, Gx[i]); // px = pow2(s) - p1.x - p2.x;
/*
ModSub256(py, Gx[i], px);
_ModMult(py, _s); // py = - s*(ret.x-p2.x)
ModAdd256(py, Gy[i]); // py = - p2.y - s*(ret.x-p2.x);
*/
CHECK_PREFIX(0);
i++;
// Next start point (startP + GRP_SIZE*G)
Load256(px, sx);
Load256(py, sy);
ModSub256(dy, _2Gny, py);
_ModMult(_s, dy, dx[i]); // s = (p2.y-p1.y)*inverse(p2.x-p1.x)
//_ModMult(_p2, _s, _s); // _p = pow2(s)
_ModSqr(_p2, _s);
ModSub256(px, _p2, px);
ModSub256(px, _2Gnx); // px = pow2(s) - p1.x - p2.x;
ModSub256(py, _2Gnx, px);
_ModMult(py, _s); // py = - s*(ret.x-p2.x)
//_ModSqr(py, _s);
ModSub256(py, _2Gny); // py = - p2.y - s*(ret.x-p2.x);
Load256(sx, px);
Load256(sy, py);
}
// Update starting point
__syncthreads();
Store256A(startx, sx);