Well, this algorithm is being mined by ASICs for a while now, so I don't think it's worth working on a GPU implementation.
And, probably FPGAs are on it as well.
Thanks for anwser,
I have an rig with vega 64, its not profitable?
Regards
It was the Bitcointalk forum that inspired us to create Bitcointalksearch.org - Bitcointalk is an excellent site that should be the default page for anybody dealing in cryptocurrency, since it is a virtual gold-mine of data. However, our experience and user feedback led us create our site; Bitcointalk's search is slow, and difficult to get the results you need, because you need to log in first to find anything useful - furthermore, there are rate limiters for their search functionality.
The aim of our project is to create a faster website that yields more results and faster without having to create an account and eliminate the need to log in - your personal data, therefore, will never be in jeopardy since we are not asking for any of your data and you don't need to provide them to use our site with all of its capabilities.
We created this website with the sole purpose of users being able to search quickly and efficiently in the field of cryptocurrency so they will have access to the latest and most accurate information and thereby assisting the crypto-community at large.
/*
* Myriadcoin Groestl kernel implementation (Groestl-512 + SHA-256)
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2007-2010 Thomas Pornin
* Copyright (c) 2014 phm
* Copyright (c) 2014-2015 John Doering
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*/
#ifndef MYRIADCOIN_GROESTL_CL
#define MYRIADCOIN_GROESTL_CL
#if __ENDIAN_LITTLE__
#define SPH_LITTLE_ENDIAN 1
#else
#define SPH_BIG_ENDIAN 1
#endif
#define C32(a) ((uint)(a ## U))
#define T32(a) (as_uint(a))
#define ROTL32(a, b) rotate(as_uint(a), as_uint(b))
#define ROTR32(a, b) ROTL32(a, (32 - (b)))
#define C64(a) ((ulong)(a ## UL))
#define T64(a) (as_ulong(a))
#undef USE_LE
#if SPH_GROESTL_LITTLE_ENDIAN
#define USE_LE 1
#elif SPH_GROESTL_BIG_ENDIAN
#define USE_LE 0
#elif SPH_LITTLE_ENDIAN
#define USE_LE 1
#endif
#if USE_LE
#define C64e(x) ((C64(x) >> 56) \
| ((C64(x) >> 40) & C64(0x000000000000FF00)) \
| ((C64(x) >> 24) & C64(0x0000000000FF0000)) \
| ((C64(x) >> 8) & C64(0x00000000FF000000)) \
| ((C64(x) << 8) & C64(0x000000FF00000000)) \
| ((C64(x) << 24) & C64(0x0000FF0000000000)) \
| ((C64(x) << 40) & C64(0x00FF000000000000)) \
| ((C64(x) << 56) & C64(0xFF00000000000000)))
#define B64_0(x) ((x) & 0xFF)
#define B64_1(x) (((x) >> 8) & 0xFF)
#define B64_2(x) (((x) >> 16) & 0xFF)
#define B64_3(x) (((x) >> 24) & 0xFF)
#define B64_4(x) (((x) >> 32) & 0xFF)
#define B64_5(x) (((x) >> 40) & 0xFF)
#define B64_6(x) (((x) >> 48) & 0xFF)
#define B64_7(x) ((x) >> 56)
#define PC64(j, r) ((ulong)((j) + (r)))
#define QC64(j, r) (((ulong)(r) << 56) ^ T64(~((ulong)(j) << 56)))
#define H15 (((ulong)(512 & 0xFF) << 56) | ((ulong)(512 & 0xFF00) << 40))
#else
#define C64e(x) C64(x)
#define B64_0(x) ((x) >> 56)
#define B64_1(x) (((x) >> 48) & 0xFF)
#define B64_2(x) (((x) >> 40) & 0xFF)
#define B64_3(x) (((x) >> 32) & 0xFF)
#define B64_4(x) (((x) >> 24) & 0xFF)
#define B64_5(x) (((x) >> 16) & 0xFF)
#define B64_6(x) (((x) >> 8) & 0xFF)
#define B64_7(x) ((x) & 0xFF)
#define PC64(j, r) ((ulong)((j) + (r)) << 56)
#define QC64(j, r) ((ulong)(r) ^ T64(~(ulong)(j)))
#define H15 (ulong)512
#endif
#define M15 0x100000000000000
__constant ulong T0[] = {
C64e(0xc632f4a5f497a5c6), C64e(0xf86f978497eb84f8),
C64e(0xee5eb099b0c799ee), C64e(0xf67a8c8d8cf78df6),
C64e(0xffe8170d17e50dff), C64e(0xd60adcbddcb7bdd6),
C64e(0xde16c8b1c8a7b1de), C64e(0x916dfc54fc395491),
C64e(0x6090f050f0c05060), C64e(0x0207050305040302),
C64e(0xce2ee0a9e087a9ce), C64e(0x56d1877d87ac7d56),
C64e(0xe7cc2b192bd519e7), C64e(0xb513a662a67162b5),
C64e(0x4d7c31e6319ae64d), C64e(0xec59b59ab5c39aec),
C64e(0x8f40cf45cf05458f), C64e(0x1fa3bc9dbc3e9d1f),
C64e(0x8949c040c0094089), C64e(0xfa68928792ef87fa),
C64e(0xefd03f153fc515ef), C64e(0xb29426eb267febb2),
C64e(0x8ece40c94007c98e), C64e(0xfbe61d0b1ded0bfb),
C64e(0x416e2fec2f82ec41), C64e(0xb31aa967a97d67b3),
C64e(0x5f431cfd1cbefd5f), C64e(0x456025ea258aea45),
C64e(0x23f9dabfda46bf23), C64e(0x535102f702a6f753),
C64e(0xe445a196a1d396e4), C64e(0x9b76ed5bed2d5b9b),
C64e(0x75285dc25deac275), C64e(0xe1c5241c24d91ce1),
C64e(0x3dd4e9aee97aae3d), C64e(0x4cf2be6abe986a4c),
C64e(0x6c82ee5aeed85a6c), C64e(0x7ebdc341c3fc417e),
C64e(0xf5f3060206f102f5), C64e(0x8352d14fd11d4f83),
C64e(0x688ce45ce4d05c68), C64e(0x515607f407a2f451),
C64e(0xd18d5c345cb934d1), C64e(0xf9e1180818e908f9),
C64e(0xe24cae93aedf93e2), C64e(0xab3e9573954d73ab),
C64e(0x6297f553f5c45362), C64e(0x2a6b413f41543f2a),
C64e(0x081c140c14100c08), C64e(0x9563f652f6315295),
C64e(0x46e9af65af8c6546), C64e(0x9d7fe25ee2215e9d),
C64e(0x3048782878602830), C64e(0x37cff8a1f86ea137),
C64e(0x0a1b110f11140f0a), C64e(0x2febc4b5c45eb52f),
C64e(0x0e151b091b1c090e), C64e(0x247e5a365a483624),
C64e(0x1badb69bb6369b1b), C64e(0xdf98473d47a53ddf),
C64e(0xcda76a266a8126cd), C64e(0x4ef5bb69bb9c694e),
C64e(0x7f334ccd4cfecd7f), C64e(0xea50ba9fbacf9fea),
C64e(0x123f2d1b2d241b12), C64e(0x1da4b99eb93a9e1d),
C64e(0x58c49c749cb07458), C64e(0x3446722e72682e34),
C64e(0x3641772d776c2d36), C64e(0xdc11cdb2cda3b2dc),
C64e(0xb49d29ee2973eeb4), C64e(0x5b4d16fb16b6fb5b),
C64e(0xa4a501f60153f6a4), C64e(0x76a1d74dd7ec4d76),
C64e(0xb714a361a37561b7), C64e(0x7d3449ce49face7d),
C64e(0x52df8d7b8da47b52), C64e(0xdd9f423e42a13edd),
C64e(0x5ecd937193bc715e), C64e(0x13b1a297a2269713),
C64e(0xa6a204f50457f5a6), C64e(0xb901b868b86968b9),
C64e(0x0000000000000000), C64e(0xc1b5742c74992cc1),
C64e(0x40e0a060a0806040), C64e(0xe3c2211f21dd1fe3),
C64e(0x793a43c843f2c879), C64e(0xb69a2ced2c77edb6),
C64e(0xd40dd9bed9b3bed4), C64e(0x8d47ca46ca01468d),
C64e(0x671770d970ced967), C64e(0x72afdd4bdde44b72),
C64e(0x94ed79de7933de94), C64e(0x98ff67d4672bd498),
C64e(0xb09323e8237be8b0), C64e(0x855bde4ade114a85),
C64e(0xbb06bd6bbd6d6bbb), C64e(0xc5bb7e2a7e912ac5),
C64e(0x4f7b34e5349ee54f), C64e(0xedd73a163ac116ed),
C64e(0x86d254c55417c586), C64e(0x9af862d7622fd79a),
C64e(0x6699ff55ffcc5566), C64e(0x11b6a794a7229411),
C64e(0x8ac04acf4a0fcf8a), C64e(0xe9d9301030c910e9),
C64e(0x040e0a060a080604), C64e(0xfe66988198e781fe),
C64e(0xa0ab0bf00b5bf0a0), C64e(0x78b4cc44ccf04478),
C64e(0x25f0d5bad54aba25), C64e(0x4b753ee33e96e34b),
C64e(0xa2ac0ef30e5ff3a2), C64e(0x5d4419fe19bafe5d),
C64e(0x80db5bc05b1bc080), C64e(0x0580858a850a8a05),
C64e(0x3fd3ecadec7ead3f), C64e(0x21fedfbcdf42bc21),
C64e(0x70a8d848d8e04870), C64e(0xf1fd0c040cf904f1),
C64e(0x63197adf7ac6df63), C64e(0x772f58c158eec177),
C64e(0xaf309f759f4575af), C64e(0x42e7a563a5846342),
C64e(0x2070503050403020), C64e(0xe5cb2e1a2ed11ae5),
C64e(0xfdef120e12e10efd), C64e(0xbf08b76db7656dbf),
C64e(0x8155d44cd4194c81), C64e(0x18243c143c301418),
C64e(0x26795f355f4c3526), C64e(0xc3b2712f719d2fc3),
C64e(0xbe8638e13867e1be), C64e(0x35c8fda2fd6aa235),
C64e(0x88c74fcc4f0bcc88), C64e(0x2e654b394b5c392e),
C64e(0x936af957f93d5793), C64e(0x55580df20daaf255),
C64e(0xfc619d829de382fc), C64e(0x7ab3c947c9f4477a),
C64e(0xc827efacef8bacc8), C64e(0xba8832e7326fe7ba),
C64e(0x324f7d2b7d642b32), C64e(0xe642a495a4d795e6),
C64e(0xc03bfba0fb9ba0c0), C64e(0x19aab398b3329819),
C64e(0x9ef668d16827d19e), C64e(0xa322817f815d7fa3),
C64e(0x44eeaa66aa886644), C64e(0x54d6827e82a87e54),
C64e(0x3bdde6abe676ab3b), C64e(0x0b959e839e16830b),
C64e(0x8cc945ca4503ca8c), C64e(0xc7bc7b297b9529c7),
C64e(0x6b056ed36ed6d36b), C64e(0x286c443c44503c28),
C64e(0xa72c8b798b5579a7), C64e(0xbc813de23d63e2bc),
C64e(0x1631271d272c1d16), C64e(0xad379a769a4176ad),
C64e(0xdb964d3b4dad3bdb), C64e(0x649efa56fac85664),
C64e(0x74a6d24ed2e84e74), C64e(0x1436221e22281e14),
C64e(0x92e476db763fdb92), C64e(0x0c121e0a1e180a0c),
C64e(0x48fcb46cb4906c48), C64e(0xb88f37e4376be4b8),
C64e(0x9f78e75de7255d9f), C64e(0xbd0fb26eb2616ebd),
C64e(0x43692aef2a86ef43), C64e(0xc435f1a6f193a6c4),
C64e(0x39dae3a8e372a839), C64e(0x31c6f7a4f762a431),
C64e(0xd38a593759bd37d3), C64e(0xf274868b86ff8bf2),
C64e(0xd583563256b132d5), C64e(0x8b4ec543c50d438b),
C64e(0x6e85eb59ebdc596e), C64e(0xda18c2b7c2afb7da),
C64e(0x018e8f8c8f028c01), C64e(0xb11dac64ac7964b1),
C64e(0x9cf16dd26d23d29c), C64e(0x49723be03b92e049),
C64e(0xd81fc7b4c7abb4d8), C64e(0xacb915fa1543faac),
C64e(0xf3fa090709fd07f3), C64e(0xcfa06f256f8525cf),
C64e(0xca20eaafea8fafca), C64e(0xf47d898e89f38ef4),
C64e(0x476720e9208ee947), C64e(0x1038281828201810),
C64e(0x6f0b64d564ded56f), C64e(0xf073838883fb88f0),
C64e(0x4afbb16fb1946f4a), C64e(0x5cca967296b8725c),
C64e(0x38546c246c702438), C64e(0x575f08f108aef157),
C64e(0x732152c752e6c773), C64e(0x9764f351f3355197),
C64e(0xcbae6523658d23cb), C64e(0xa125847c84597ca1),
C64e(0xe857bf9cbfcb9ce8), C64e(0x3e5d6321637c213e),
C64e(0x96ea7cdd7c37dd96), C64e(0x611e7fdc7fc2dc61),
C64e(0x0d9c9186911a860d), C64e(0x0f9b9485941e850f),
C64e(0xe04bab90abdb90e0), C64e(0x7cbac642c6f8427c),
C64e(0x712657c457e2c471), C64e(0xcc29e5aae583aacc),
C64e(0x90e373d8733bd890), C64e(0x06090f050f0c0506),
C64e(0xf7f4030103f501f7), C64e(0x1c2a36123638121c),
C64e(0xc23cfea3fe9fa3c2), C64e(0x6a8be15fe1d45f6a),
C64e(0xaebe10f91047f9ae), C64e(0x69026bd06bd2d069),
C64e(0x17bfa891a82e9117), C64e(0x9971e858e8295899),
C64e(0x3a5369276974273a), C64e(0x27f7d0b9d04eb927),
C64e(0xd991483848a938d9), C64e(0xebde351335cd13eb),
C64e(0x2be5ceb3ce56b32b), C64e(0x2277553355443322),
C64e(0xd204d6bbd6bfbbd2), C64e(0xa9399070904970a9),
C64e(0x07878089800e8907), C64e(0x33c1f2a7f266a733),
C64e(0x2decc1b6c15ab62d), C64e(0x3c5a66226678223c),
C64e(0x15b8ad92ad2a9215), C64e(0xc9a96020608920c9),
C64e(0x875cdb49db154987), C64e(0xaab01aff1a4fffaa),
C64e(0x50d8887888a07850), C64e(0xa52b8e7a8e517aa5),
C64e(0x03898a8f8a068f03), C64e(0x594a13f813b2f859),
C64e(0x09929b809b128009), C64e(0x1a2339173934171a),
C64e(0x651075da75cada65), C64e(0xd784533153b531d7),
C64e(0x84d551c65113c684), C64e(0xd003d3b8d3bbb8d0),
C64e(0x82dc5ec35e1fc382), C64e(0x29e2cbb0cb52b029),
C64e(0x5ac3997799b4775a), C64e(0x1e2d3311333c111e),
C64e(0x7b3d46cb46f6cb7b), C64e(0xa8b71ffc1f4bfca8),
C64e(0x6d0c61d661dad66d), C64e(0x2c624e3a4e583a2c)
};
#define RBTT(d, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \
t[d] = T0[B64_0(a[b0])] \
^ T1[B64_1(a[b1])] \
^ T2[B64_2(a[b2])] \
^ T3[B64_3(a[b3])] \
^ T4[B64_4(a[b4])] \
^ T5[B64_5(a[b5])] \
^ T6[B64_6(a[b6])] \
^ T7[B64_7(a[b7])]; \
} while (0)
#define ROUND_BIG_P(a, r) do { \
a[0] ^= PC64(0x00, r); \
a[1] ^= PC64(0x10, r); \
a[2] ^= PC64(0x20, r); \
a[3] ^= PC64(0x30, r); \
a[4] ^= PC64(0x40, r); \
a[5] ^= PC64(0x50, r); \
a[6] ^= PC64(0x60, r); \
a[7] ^= PC64(0x70, r); \
a[8] ^= PC64(0x80, r); \
a[9] ^= PC64(0x90, r); \
a[10] ^= PC64(0xA0, r); \
a[11] ^= PC64(0xB0, r); \
a[12] ^= PC64(0xC0, r); \
a[13] ^= PC64(0xD0, r); \
a[14] ^= PC64(0xE0, r); \
a[15] ^= PC64(0xF0, r); \
RBTT( 0, a, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xB); \
RBTT( 1, a, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xC); \
RBTT( 2, a, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0xD); \
RBTT( 3, a, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xE); \
RBTT( 4, a, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xF); \
RBTT( 5, a, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0x0); \
RBTT( 6, a, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0x1); \
RBTT( 7, a, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0x2); \
RBTT( 8, a, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0x3); \
RBTT( 9, a, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x4); \
RBTT(10, a, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x5); \
RBTT(11, a, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x6); \
RBTT(12, a, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x7); \
RBTT(13, a, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x8); \
RBTT(14, a, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x9); \
RBTT(15, a, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xA); \
a[0] = t[0]; \
a[1] = t[1]; \
a[2] = t[2]; \
a[3] = t[3]; \
a[4] = t[4]; \
a[5] = t[5]; \
a[6] = t[6]; \
a[7] = t[7]; \
a[8] = t[8]; \
a[9] = t[9]; \
a[10] = t[10]; \
a[11] = t[11]; \
a[12] = t[12]; \
a[13] = t[13]; \
a[14] = t[14]; \
a[15] = t[15]; \
} while (0)
#define ROUND_BIG_Q(a, r) do { \
a[0] ^= QC64(0x00, r); \
a[1] ^= QC64(0x10, r); \
a[2] ^= QC64(0x20, r); \
a[3] ^= QC64(0x30, r); \
a[4] ^= QC64(0x40, r); \
a[5] ^= QC64(0x50, r); \
a[6] ^= QC64(0x60, r); \
a[7] ^= QC64(0x70, r); \
a[8] ^= QC64(0x80, r); \
a[9] ^= QC64(0x90, r); \
a[10] ^= QC64(0xA0, r); \
a[11] ^= QC64(0xB0, r); \
a[12] ^= QC64(0xC0, r); \
a[13] ^= QC64(0xD0, r); \
a[14] ^= QC64(0xE0, r); \
a[15] ^= QC64(0xF0, r); \
RBTT(0x0, a, 0x1, 0x3, 0x5, 0xB, 0x0, 0x2, 0x4, 0x6); \
RBTT(0x1, a, 0x2, 0x4, 0x6, 0xC, 0x1, 0x3, 0x5, 0x7); \
RBTT(0x2, a, 0x3, 0x5, 0x7, 0xD, 0x2, 0x4, 0x6, 0x8); \
RBTT(0x3, a, 0x4, 0x6, 0x8, 0xE, 0x3, 0x5, 0x7, 0x9); \
RBTT(0x4, a, 0x5, 0x7, 0x9, 0xF, 0x4, 0x6, 0x8, 0xA); \
RBTT(0x5, a, 0x6, 0x8, 0xA, 0x0, 0x5, 0x7, 0x9, 0xB); \
RBTT(0x6, a, 0x7, 0x9, 0xB, 0x1, 0x6, 0x8, 0xA, 0xC); \
RBTT(0x7, a, 0x8, 0xA, 0xC, 0x2, 0x7, 0x9, 0xB, 0xD); \
RBTT(0x8, a, 0x9, 0xB, 0xD, 0x3, 0x8, 0xA, 0xC, 0xE); \
RBTT(0x9, a, 0xA, 0xC, 0xE, 0x4, 0x9, 0xB, 0xD, 0xF); \
RBTT(0xA, a, 0xB, 0xD, 0xF, 0x5, 0xA, 0xC, 0xE, 0x0); \
RBTT(0xB, a, 0xC, 0xE, 0x0, 0x6, 0xB, 0xD, 0xF, 0x1); \
RBTT(0xC, a, 0xD, 0xF, 0x1, 0x7, 0xC, 0xE, 0x0, 0x2); \
RBTT(0xD, a, 0xE, 0x0, 0x2, 0x8, 0xD, 0xF, 0x1, 0x3); \
RBTT(0xE, a, 0xF, 0x1, 0x3, 0x9, 0xE, 0x0, 0x2, 0x4); \
RBTT(0xF, a, 0x0, 0x2, 0x4, 0xA, 0xF, 0x1, 0x3, 0x5); \
a[0] = t[0]; \
a[1] = t[1]; \
a[2] = t[2]; \
a[3] = t[3]; \
a[4] = t[4]; \
a[5] = t[5]; \
a[6] = t[6]; \
a[7] = t[7]; \
a[8] = t[8]; \
a[9] = t[9]; \
a[10] = t[10]; \
a[11] = t[11]; \
a[12] = t[12]; \
a[13] = t[13]; \
a[14] = t[14]; \
a[15] = t[15]; \
} while (0)
#define SWAP4(x) as_uint(as_uchar4(x).wzyx)
#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
#if SPH_BIG_ENDIAN
#define ENC64E(x) SWAP8(x)
#define DEC64E(x) SWAP8(*(const __global ulong *) (x));
#else
#define ENC64E(x) (x)
#define DEC64E(x) (*(const __global ulong *) (x));
#endif
#define SHR(x, n) ((x) >> n)
#define SWAP32(a) (as_uint(as_uchar4(a).wzyx))
#define S0(x) (ROTL32(x, 25) ^ ROTL32(x, 14) ^ SHR(x, 3))
#define S1(x) (ROTL32(x, 15) ^ ROTL32(x, 13) ^ SHR(x, 10))
#define S2(x) (ROTL32(x, 30) ^ ROTL32(x, 19) ^ ROTL32(x, 10))
#define S3(x) (ROTL32(x, 26) ^ ROTL32(x, 21) ^ ROTL32(x, 7))
#define P(a, b, c, d, e, f, g, h, x, K) { \
temp = h + S3(e) + F1(e, f, g) + (K + x); \
d += temp; h = temp + S2(a) + F0(a, b, c); \
}
#define PLAST(a, b, c, d, e, f, g, h, x, K) { \
d += h + S3(e) + F1(e, f, g) + (x + K); \
}
#define F0(y, x, z) bitselect(z, y, z ^ x)
#define F1(x, y, z) bitselect(z, y, x)
#define R0 (W0 = S1(W14) + W9 + S0(W1) + W0)
#define R1 (W1 = S1(W15) + W10 + S0(W2) + W1)
#define R2 (W2 = S1(W0) + W11 + S0(W3) + W2)
#define R3 (W3 = S1(W1) + W12 + S0(W4) + W3)
#define R4 (W4 = S1(W2) + W13 + S0(W5) + W4)
#define R5 (W5 = S1(W3) + W14 + S0(W6) + W5)
#define R6 (W6 = S1(W4) + W15 + S0(W7) + W6)
#define R7 (W7 = S1(W5) + W0 + S0(W8) + W7)
#define R8 (W8 = S1(W6) + W1 + S0(W9) + W8)
#define R9 (W9 = S1(W7) + W2 + S0(W10) + W9)
#define R10 (W10 = S1(W8) + W3 + S0(W11) + W10)
#define R11 (W11 = S1(W9) + W4 + S0(W12) + W11)
#define R12 (W12 = S1(W10) + W5 + S0(W13) + W12)
#define R13 (W13 = S1(W11) + W6 + S0(W14) + W13)
#define R14 (W14 = S1(W12) + W7 + S0(W15) + W14)
#define R15 (W15 = S1(W13) + W8 + S0(W0) + W15)
#define RD14 (S1(W12) + W7 + S0(W15) + W14)
#define RD15 (S1(W13) + W8 + S0(W0) + W15)
__kernel __attribute__((vec_type_hint(uint4)))
__kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global unsigned char* block, volatile __global uint* output,
const ulong target) {
uint glbid = get_global_id(0);
uint lclid = get_local_id(0);
ulong r;
uint i;
/* Groestl-512 */
__private ulong16 GMT[3];
ulong *g = (ulong *) &GMT[0];
ulong *m = (ulong *) &GMT[1];
ulong *t = (ulong *) &GMT[2];
__local ulong T0_L[256], T1_L[256], T2_L[256], T3_L[256],
T4_L[256], T5_L[256], T6_L[256], T7_L[256];
/* Compute the tables */
#if (WORKSIZE == 64)
T0_L[lclid] = T0[lclid];
T0_L[lclid + 64] = T0[lclid + 64];
T0_L[lclid + 128] = T0[lclid + 128];
T0_L[lclid + 192] = T0[lclid + 192];
T1_L[lclid] = rotate(T0[lclid], 8UL);
T1_L[lclid + 64] = rotate(T0[lclid + 64], 8UL);
T1_L[lclid + 128] = rotate(T0[lclid + 128], 8UL);
T1_L[lclid + 192] = rotate(T0[lclid + 192], 8UL);
T2_L[lclid] = rotate(T0[lclid], 16UL);
T2_L[lclid + 64] = rotate(T0[lclid + 64], 16UL);
T2_L[lclid + 128] = rotate(T0[lclid + 128], 16UL);
T2_L[lclid + 192] = rotate(T0[lclid + 192], 16UL);
T3_L[lclid] = rotate(T0[lclid], 24UL);
T3_L[lclid + 64] = rotate(T0[lclid + 64], 24UL);
T3_L[lclid + 128] = rotate(T0[lclid + 128], 24UL);
T3_L[lclid + 192] = rotate(T0[lclid + 192], 24UL);
T4_L[lclid] = rotate(T0[lclid], 32UL);
T4_L[lclid + 64] = rotate(T0[lclid + 64], 32UL);
T4_L[lclid + 128] = rotate(T0[lclid + 128], 32UL);
T4_L[lclid + 192] = rotate(T0[lclid + 192], 32UL);
T5_L[lclid] = rotate(T0[lclid], 40UL);
T5_L[lclid + 64] = rotate(T0[lclid + 64], 40UL);
T5_L[lclid + 128] = rotate(T0[lclid + 128], 40UL);
T5_L[lclid + 192] = rotate(T0[lclid + 192], 40UL);
T6_L[lclid] = rotate(T0[lclid], 48UL);
T6_L[lclid + 64] = rotate(T0[lclid + 64], 48UL);
T6_L[lclid + 128] = rotate(T0[lclid + 128], 48UL);
T6_L[lclid + 192] = rotate(T0[lclid + 192], 48UL);
T7_L[lclid] = rotate(T0[lclid], 56UL);
T7_L[lclid + 64] = rotate(T0[lclid + 64], 56UL);
T7_L[lclid + 128] = rotate(T0[lclid + 128], 56UL);
T7_L[lclid + 192] = rotate(T0[lclid + 192], 56UL);
#elif (WORKSIZE == 128)
T0_L[lclid] = T0[lclid];
T0_L[lclid + 128] = T0[lclid + 128];
T1_L[lclid] = rotate(T0[lclid], 8UL);
T1_L[lclid + 128] = rotate(T0[lclid + 128], 8UL);
T2_L[lclid] = rotate(T0[lclid], 16UL);
T2_L[lclid + 128] = rotate(T0[lclid + 128], 16UL);
T3_L[lclid] = rotate(T0[lclid], 24UL);
T3_L[lclid + 128] = rotate(T0[lclid + 128], 24UL);
T4_L[lclid] = rotate(T0[lclid], 32UL);
T4_L[lclid + 128] = rotate(T0[lclid + 128], 32UL);
T5_L[lclid] = rotate(T0[lclid], 40UL);
T5_L[lclid + 128] = rotate(T0[lclid + 128], 40UL);
T6_L[lclid] = rotate(T0[lclid], 48UL);
T6_L[lclid + 128] = rotate(T0[lclid + 128], 48UL);
T7_L[lclid] = rotate(T0[lclid], 56UL);
T7_L[lclid + 128] = rotate(T0[lclid + 128], 56UL);
#elif (WORKSIZE == 256)
T0_L[lclid] = T0[lclid];
T1_L[lclid] = rotate(T0[lclid], 8UL);
T2_L[lclid] = rotate(T0[lclid], 16UL);
T3_L[lclid] = rotate(T0[lclid], 24UL);
T4_L[lclid] = rotate(T0[lclid], 32UL);
T5_L[lclid] = rotate(T0[lclid], 40UL);
T6_L[lclid] = rotate(T0[lclid], 48UL);
T7_L[lclid] = rotate(T0[lclid], 56UL);
#else
return;
#endif
#define T0 T0_L
#define T1 T1_L
#define T2 T2_L
#define T3 T3_L
#define T4 T4_L
#define T5 T5_L
#define T6 T6_L
#define T7 T7_L
m[0] = DEC64E(block);
m[1] = DEC64E(block + 8);
m[2] = DEC64E(block + 16);
m[3] = DEC64E(block + 24);
m[4] = DEC64E(block + 32);
m[5] = DEC64E(block + 40);
m[6] = DEC64E(block + 48);
m[7] = DEC64E(block + 56);
m[8] = DEC64E(block + 64);
m[9] = DEC64E(block + 72);
m[9] &= 0x00000000FFFFFFFF;
m[9] |= ((ulong) glbid << 32);
m[10] = 0x80;
m[11] = 0;
m[12] = 0;
m[13] = 0;
m[14] = 0;
m[15] = M15;
g[0] = m[0];
g[1] = m[1];
g[2] = m[2];
g[3] = m[3];
g[4] = m[4];
g[5] = m[5];
g[6] = m[6];
g[7] = m[7];
g[8] = m[8];
g[9] = m[9];
g[10] = m[10];
g[11] = m[11];
g[12] = m[12];
g[13] = m[13];
g[14] = m[14];
g[15] = M15 ^ H15;
/* PERM_BIG_Q(m); */
for(r = 0; r < 14; r++)
ROUND_BIG_Q(m, r);
/* PERM_BIG_P(g); */
for(r = 0; r < 14; r++)
ROUND_BIG_P(g, r);
g[0] ^= m[0];
g[1] ^= m[1];
g[2] ^= m[2];
g[3] ^= m[3];
g[4] ^= m[4];
g[5] ^= m[5];
g[6] ^= m[6];
g[7] ^= m[7];
g[8] ^= m[8];
g[9] ^= m[9];
g[10] ^= m[10];
g[11] ^= m[11];
g[12] ^= m[12];
g[13] ^= m[13];
g[14] ^= m[14];
g[15] ^= m[15] ^ H15;
m[0] = g[0];
m[1] = g[1];
m[2] = g[2];
m[3] = g[3];
m[4] = g[4];
m[5] = g[5];
m[6] = g[6];
m[7] = g[7];
m[8] = g[8];
m[9] = g[9];
m[10] = g[10];
m[11] = g[11];
m[12] = g[12];
m[13] = g[13];
m[14] = g[14];
m[15] = g[15];
/* PERM_BIG_P(g); */
for(r = 0; r < 14; r++)
ROUND_BIG_P(g, r);
m[8] = m[8] ^ g[8];
m[9] = m[9] ^ g[9];
m[10] = m[10] ^ g[10];
m[11] = m[11] ^ g[11];
m[12] = m[12] ^ g[12];
m[13] = m[13] ^ g[13];
m[14] = m[14] ^ g[14];
m[15] = m[15] ^ g[15];
/* SHA-256 */
__private uint16 hash[1];
uint *hash_uint = (uint *) hash;
ulong *hash_ulong = (ulong *) hash;
uint temp;
hash_ulong[0] = ENC64E(m[8]);
hash_ulong[1] = ENC64E(m[9]);
hash_ulong[2] = ENC64E(m[10]);
hash_ulong[3] = ENC64E(m[11]);
hash_ulong[4] = ENC64E(m[12]);
hash_ulong[5] = ENC64E(m[13]);
hash_ulong[6] = ENC64E(m[14]);
hash_ulong[7] = ENC64E(m[15]);
uint W0 = SWAP32(hash_uint[0]);
uint W1 = SWAP32(hash_uint[1]);
uint W2 = SWAP32(hash_uint[2]);
uint W3 = SWAP32(hash_uint[3]);
uint W4 = SWAP32(hash_uint[4]);
uint W5 = SWAP32(hash_uint[5]);
uint W6 = SWAP32(hash_uint[6]);
uint W7 = SWAP32(hash_uint[7]);
uint W8 = SWAP32(hash_uint[8]);
uint W9 = SWAP32(hash_uint[9]);
uint W10 = SWAP32(hash_uint[10]);
uint W11 = SWAP32(hash_uint[11]);
uint W12 = SWAP32(hash_uint[12]);
uint W13 = SWAP32(hash_uint[13]);
uint W14 = SWAP32(hash_uint[14]);
uint W15 = SWAP32(hash_uint[15]);
uint v0 = 0x6A09E667;
uint v1 = 0xBB67AE85;
uint v2 = 0x3C6EF372;
uint v3 = 0xA54FF53A;
uint v4 = 0x510E527F;
uint v5 = 0x9B05688C;
uint v6 = 0x1F83D9AB;
uint v7 = 0x5BE0CD19;
P(v0, v1, v2, v3, v4, v5, v6, v7, W0, 0x428A2F98);
P(v7, v0, v1, v2, v3, v4, v5, v6, W1, 0x71374491);
P(v6, v7, v0, v1, v2, v3, v4, v5, W2, 0xB5C0FBCF);
P(v5, v6, v7, v0, v1, v2, v3, v4, W3, 0xE9B5DBA5);
P(v4, v5, v6, v7, v0, v1, v2, v3, W4, 0x3956C25B);
P(v3, v4, v5, v6, v7, v0, v1, v2, W5, 0x59F111F1);
P(v2, v3, v4, v5, v6, v7, v0, v1, W6, 0x923F82A4);
P(v1, v2, v3, v4, v5, v6, v7, v0, W7, 0xAB1C5ED5);
P(v0, v1, v2, v3, v4, v5, v6, v7, W8, 0xD807AA98);
P(v7, v0, v1, v2, v3, v4, v5, v6, W9, 0x12835B01);
P(v6, v7, v0, v1, v2, v3, v4, v5, W10, 0x243185BE);
P(v5, v6, v7, v0, v1, v2, v3, v4, W11, 0x550C7DC3);
P(v4, v5, v6, v7, v0, v1, v2, v3, W12, 0x72BE5D74);
P(v3, v4, v5, v6, v7, v0, v1, v2, W13, 0x80DEB1FE);
P(v2, v3, v4, v5, v6, v7, v0, v1, W14, 0x9BDC06A7);
P(v1, v2, v3, v4, v5, v6, v7, v0, W15, 0xC19BF174);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0xE49B69C1);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0xEFBE4786);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x0FC19DC6);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x240CA1CC);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x2DE92C6F);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4A7484AA);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5CB0A9DC);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x76F988DA);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x983E5152);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA831C66D);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xB00327C8);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xBF597FC7);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xC6E00BF3);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD5A79147);
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0x06CA6351);
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x14292967);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x27B70A85);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x2E1B2138);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x4D2C6DFC);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x53380D13);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x650A7354);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x766A0ABB);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x81C2C92E);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x92722C85);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0xA2BFE8A1);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0xA81A664B);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0xC24B8B70);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0xC76C51A3);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0xD192E819);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xD6990624);
P(v2, v3, v4, v5, v6, v7, v0, v1, R14, 0xF40E3585);
P(v1, v2, v3, v4, v5, v6, v7, v0, R15, 0x106AA070);
P(v0, v1, v2, v3, v4, v5, v6, v7, R0, 0x19A4C116);
P(v7, v0, v1, v2, v3, v4, v5, v6, R1, 0x1E376C08);
P(v6, v7, v0, v1, v2, v3, v4, v5, R2, 0x2748774C);
P(v5, v6, v7, v0, v1, v2, v3, v4, R3, 0x34B0BCB5);
P(v4, v5, v6, v7, v0, v1, v2, v3, R4, 0x391C0CB3);
P(v3, v4, v5, v6, v7, v0, v1, v2, R5, 0x4ED8AA4A);
P(v2, v3, v4, v5, v6, v7, v0, v1, R6, 0x5B9CCA4F);
P(v1, v2, v3, v4, v5, v6, v7, v0, R7, 0x682E6FF3);
P(v0, v1, v2, v3, v4, v5, v6, v7, R8, 0x748F82EE);
P(v7, v0, v1, v2, v3, v4, v5, v6, R9, 0x78A5636F);
P(v6, v7, v0, v1, v2, v3, v4, v5, R10, 0x84C87814);
P(v5, v6, v7, v0, v1, v2, v3, v4, R11, 0x8CC70208);
P(v4, v5, v6, v7, v0, v1, v2, v3, R12, 0x90BEFFFA);
P(v3, v4, v5, v6, v7, v0, v1, v2, R13, 0xA4506CEB);
P(v2, v3, v4, v5, v6, v7, v0, v1, RD14, 0xBEF9A3F7);
P(v1, v2, v3, v4, v5, v6, v7, v0, RD15, 0xC67178F2);
v0 += 0x6A09E667;
v1 += 0xBB67AE85;
v2 += 0x3C6EF372;
v3 += 0xA54FF53A;
v4 += 0x510E527F;
v5 += 0x9B05688C;
v6 += 0x1F83D9AB;
uint s6 = v6;
v7 += 0x5BE0CD19;
uint s7 = v7;
P(v0, v1, v2, v3, v4, v5, v6, v7, 0x80000000, 0x428A2F98);
P(v7, v0, v1, v2, v3, v4, v5, v6, 0, 0x71374491);
P(v6, v7, v0, v1, v2, v3, v4, v5, 0, 0xB5C0FBCF);
P(v5, v6, v7, v0, v1, v2, v3, v4, 0, 0xE9B5DBA5);
P(v4, v5, v6, v7, v0, v1, v2, v3, 0, 0x3956C25B);
P(v3, v4, v5, v6, v7, v0, v1, v2, 0, 0x59F111F1);
P(v2, v3, v4, v5, v6, v7, v0, v1, 0, 0x923F82A4);
P(v1, v2, v3, v4, v5, v6, v7, v0, 0, 0xAB1C5ED5);
P(v0, v1, v2, v3, v4, v5, v6, v7, 0, 0xD807AA98);
P(v7, v0, v1, v2, v3, v4, v5, v6, 0, 0x12835B01);
P(v6, v7, v0, v1, v2, v3, v4, v5, 0, 0x243185BE);
P(v5, v6, v7, v0, v1, v2, v3, v4, 0, 0x550C7DC3);
P(v4, v5, v6, v7, v0, v1, v2, v3, 0, 0x72BE5D74);
P(v3, v4, v5, v6, v7, v0, v1, v2, 0, 0x80DEB1FE);
P(v2, v3, v4, v5, v6, v7, v0, v1, 0, 0x9BDC06A7);
P(v1, v2, v3, v4, v5, v6, v7, v0, 512, 0xC19BF174);
P(v0, v1, v2, v3, v4, v5, v6, v7, 0x80000000U, 0xE49B69C1U);
P(v7, v0, v1, v2, v3, v4, v5, v6, 0x01400000U, 0xEFBE4786U);
P(v6, v7, v0, v1, v2, v3, v4, v5, 0x00205000U, 0x0FC19DC6U);
P(v5, v6, v7, v0, v1, v2, v3, v4, 0x00005088U, 0x240CA1CCU);
P(v4, v5, v6, v7, v0, v1, v2, v3, 0x22000800U, 0x2DE92C6FU);
P(v3, v4, v5, v6, v7, v0, v1, v2, 0x22550014U, 0x4A7484AAU);
P(v2, v3, v4, v5, v6, v7, v0, v1, 0x05089742U, 0x5CB0A9DCU);
P(v1, v2, v3, v4, v5, v6, v7, v0, 0xa0000020U, 0x76F988DAU);
P(v0, v1, v2, v3, v4, v5, v6, v7, 0x5a880000U, 0x983E5152U);
P(v7, v0, v1, v2, v3, v4, v5, v6, 0x005c9400U, 0xA831C66DU);
P(v6, v7, v0, v1, v2, v3, v4, v5, 0x0016d49dU, 0xB00327C8U);
P(v5, v6, v7, v0, v1, v2, v3, v4, 0xfa801f00U, 0xBF597FC7U);
P(v4, v5, v6, v7, v0, v1, v2, v3, 0xd33225d0U, 0xC6E00BF3U);
P(v3, v4, v5, v6, v7, v0, v1, v2, 0x11675959U, 0xD5A79147U);
P(v2, v3, v4, v5, v6, v7, v0, v1, 0xf6e6bfdaU, 0x06CA6351U);
P(v1, v2, v3, v4, v5, v6, v7, v0, 0xb30c1549U, 0x14292967U);
P(v0, v1, v2, v3, v4, v5, v6, v7, 0x08b2b050U, 0x27B70A85U);
P(v7, v0, v1, v2, v3, v4, v5, v6, 0x9d7c4c27U, 0x2E1B2138U);
P(v6, v7, v0, v1, v2, v3, v4, v5, 0x0ce2a393U, 0x4D2C6DFCU);
P(v5, v6, v7, v0, v1, v2, v3, v4, 0x88e6e1eaU, 0x53380D13U);
P(v4, v5, v6, v7, v0, v1, v2, v3, 0xa52b4335U, 0x650A7354U);
P(v3, v4, v5, v6, v7, v0, v1, v2, 0x67a16f49U, 0x766A0ABBU);
P(v2, v3, v4, v5, v6, v7, v0, v1, 0xd732016fU, 0x81C2C92EU);
P(v1, v2, v3, v4, v5, v6, v7, v0, 0x4eeb2e91U, 0x92722C85U);
P(v0, v1, v2, v3, v4, v5, v6, v7, 0x5dbf55e5U, 0xA2BFE8A1U);
P(v7, v0, v1, v2, v3, v4, v5, v6, 0x8eee2335U, 0xA81A664BU);
P(v6, v7, v0, v1, v2, v3, v4, v5, 0xe2bc5ec2U, 0xC24B8B70U);
P(v5, v6, v7, v0, v1, v2, v3, v4, 0xa83f4394U, 0xC76C51A3U);
P(v4, v5, v6, v7, v0, v1, v2, v3, 0x45ad78f7U, 0xD192E819U);
P(v3, v4, v5, v6, v7, v0, v1, v2, 0x36f3d0cdU, 0xD6990624U);
P(v2, v3, v4, v5, v6, v7, v0, v1, 0xd99c05e8U, 0xF40E3585U);
P(v1, v2, v3, v4, v5, v6, v7, v0, 0xb0511dc7U, 0x106AA070U);
P(v0, v1, v2, v3, v4, v5, v6, v7, 0x69bc7ac4U, 0x19A4C116U);
P(v7, v0, v1, v2, v3, v4, v5, v6, 0xbd11375bU, 0x1E376C08U);
P(v6, v7, v0, v1, v2, v3, v4, v5, 0xe3ba71e5U, 0x2748774CU);
P(v5, v6, v7, v0, v1, v2, v3, v4, 0x3b209ff2U, 0x34B0BCB5U);
P(v4, v5, v6, v7, v0, v1, v2, v3, 0x18feee17U, 0x391C0CB3U);
P(v3, v4, v5, v6, v7, v0, v1, v2, 0xe25ad9e7U, 0x4ED8AA4AU);
P(v2, v3, v4, v5, v6, v7, v0, v1, 0x13375046U, 0x5B9CCA4FU);
P(v1, v2, v3, v4, v5, v6, v7, v0, 0x0515089dU, 0x682E6FF3U);
P(v0, v1, v2, v3, v4, v5, v6, v7, 0x4f0d0f04U, 0x748F82EEU);
P(v7, v0, v1, v2, v3, v4, v5, v6, 0x2627484eU, 0x78A5636FU);
P(v6, v7, v0, v1, v2, v3, v4, v5, 0x310128d2U, 0x84C87814U);
P(v5, v6, v7, v0, v1, v2, v3, v4, 0xc668b434U, 0x8CC70208U);
PLAST(v4, v5, v6, v7, v0, v1, v2, v3, 0x420841ccU, 0x90BEFFFAU);
hash_uint[6] = SWAP4(v6 + s6);
hash_uint[7] = SWAP4(v7 + s7);
if(hash_ulong[3] <= target)
output[output[0xFF]++] = SWAP4(glbid);
}
#endif /* MYRIADCOIN_GROESTL_CL */