It was the Bitcointalk forum that inspired us to create Bitcointalksearch.org - Bitcointalk is an excellent site that should be the default page for anybody dealing in cryptocurrency, since it is a virtual gold-mine of data. However, our experience and user feedback led us create our site; Bitcointalk's search is slow, and difficult to get the results you need, because you need to log in first to find anything useful - furthermore, there are rate limiters for their search functionality.
The aim of our project is to create a faster website that yields more results and faster without having to create an account and eliminate the need to log in - your personal data, therefore, will never be in jeopardy since we are not asking for any of your data and you don't need to provide them to use our site with all of its capabilities.
We created this website with the sole purpose of users being able to search quickly and efficiently in the field of cryptocurrency so they will have access to the latest and most accurate information and thereby assisting the crypto-community at large.
#include
#include
void salsa(uint *X, uint rounds);
int main(int argc, char **argv) {
uint X[16];
const int rounds = 1024*1024;
clock_t start, end;
double cpu_time_used;
for(int i=0; i<16; i++)
X[i] = i;
start = clock();
salsa(X,rounds);
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("cputime %g\n", cpu_time_used);
}
/* Salsa20, rounds must be a multiple of 2 */
void __attribute__ ((noinline)) salsa(uint *X, uint rounds) {
uint x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, t
;
x0 = X[0]; x1 = X[1]; x2 = X[2]; x3 = X[3];
x4 = X[4]; x5 = X[5]; x6 = X[6]; x7 = X[7];
x8 = X[8]; x9 = X[9]; x10 = X[10]; x11 = X[11];
x12 = X[12]; x13 = X[13]; x14 = X[14]; x15 = X[15];
#define quarter(a, b, c, d, v) \
t = a + d; if (v) printf("t: %d\n",t); \
t = ROTL32(t, 7); if(v) printf("t: %d\n",t); \
b ^= t; if(v) printf("b: %d\n",b); \
t = b + a; if(v) printf("t: %d\n",t); \
t = ROTL32(t, 9); if(v) printf("t: %d\n",t); \
c ^= t; if(v) printf("c: %d\n",c); \
t = c + b; if(v) printf("t: %d\n",t); \
t = ROTL32(t, 13); if(v) printf("t: %d\n",t); \
d ^= t; if(v) printf("d: %d\n",d); \
t = d + c; if(v) printf("t: %d\n",t); \
t = ROTL32(t, 18); if(v) printf("t: %d\n",t); \
a ^= t; if(v) printf("a: %d\n",a);
int v = 0;
for(; rounds; rounds -= 2) {
quarter( x0, x4, x8, x12, v);
quarter( x5, x9, x13, x1, v);
quarter(x10, x14, x2, x6, v);
quarter(x15, x3, x7, x11, v);
quarter( x0, x1, x2, x3, v);
quarter( x5, x6, x7, x4, v);
quarter(x10, x11, x8, x9, v);
quarter(x15, x12, x13, x14, v);
}
X[0] += x0; X[1] += x1; X[2] += x2; X[3] += x3;
X[4] += x4; X[5] += x5; X[6] += x6; X[7] += x7;
X[8] += x8; X[9] += x9; X[10] += x10; X[11] += x11;
X[12] += x12; X[13] += x13; X[14] += x14; X[15] += x15;
#undef quarter
}
cputime 0.187971
cputime 0.231245
cputime 0.187873
cputime 0.011749
cputime 0.011733
cputime 0.025701
-O2 -ftree-vectorize -ftree-slp-vectorize -ftree-loop-vectorize -ffast-math -ftree-vectorizer-verbose=7 -funsafe-loop-optimizations -funsafe-math-optimizations
cputime 0.012641
cputime 0.012635
cputime 0.012638
...
str r2, [sp, #164]
ldr r2, [sp, #12]
vldr d16, [sp, #152]
vldr d17, [sp, #160]
str r2, [sp, #172]
ldr r2, [sp, #64]
vldr d20, [sp, #168]
vldr d21, [sp, #176]
str r2, [sp, #184]
vadd.i32 q8, q8, q9
ldr r2, [sp, #36]
str r2, [sp, #188]
ldr r2, [sp, #68]
str r2, [sp, #192]
ldr r2, [sp, #72]
str r2, [sp, #196]
vldr d18, [sp, #184]
vldr d19, [sp, #192]
str r7, [sp, #212]
ldr r2, [sp]
str r2, [sp, #204]
ldr r2, [sp, #24]
vadd.i32 q9, q9, q10
/* this is for the salsa quarter round, 4 rounds in parallel,
i.e. each instruction does 4 different quarter rounds in a 4x4 matrix*/
#define vquarter(VA, VB, VC, SHIFT) \
vt = vaddq_u32(VB, VA); \
vt1 = vshlq_n_u32(vt, SHIFT); \
vt1 = vsraq_n_u32(vt1, vt, 32-SHIFT); \
VC = veorq_u32(VC, vt1);
vld1.32 {d22-d23}, [r5]
vld1.32 {d24-d25}, [r8]
vld1.32 {d18-d19}, [r7]
vld1.32 {d20-d21}, [r6]
cmp r4, #0
beq .L2
.L3:
... matrix permutations
vmov.32 r3, d22[0]
vmov.32 d8[0], r3
vmov.32 r3, d24[1]
vmov.32 d8[1], r3
vmov.32 r3, d19[0]
vmov.32 d9[0], r3
vmov.32 r3, d21[1]
vmov.32 d9[1], r3
vmov.32 r3, d24[0]
vmov.32 d10[0], r3
vmov.32 r3, d18[1]
vmov.32 d10[1], r3
vmov.32 r3, d21[0]
vmov.32 d11[0], r3
vmov.32 r3, d23[1]
...
... // all that 4x parallel quarter rounds generated from the neon intrinsics
vmov q7, q10 @ v4si
vadd.i32 q8, q10, q4
vshl.i32 q9, q8, #7
vsra.u32 q9, q8, #25
veor q9, q9, q5
vadd.i32 q10, q9, q4
vshl.i32 q8, q10, #9
vsra.u32 q8, q10, #23
veor q8, q8, q6
vadd.i32 q11, q8, q9
vshl.i32 q10, q11, #13
vsra.u32 q10, q11, #19
veor q10, q10, q7
vadd.i32 q12, q10, q8
vshl.i32 q11, q12, #18
vsra.u32 q11, q12, #14
veor q11, q11, q4
... then more matrix permutations
vmov.32 r3, d20[1]
vmov q5, q9 @ v4si
vmov.32 d10[0], r3
vmov.32 r3, d21[0]
vmov.32 d10[1], r3
vmov.32 r3, d21[1]
vmov.32 d11[0], r3
vmov.32 r3, d20[0]
vmov q14, q5 @ v4si
vmov.32 d29[1], r3
vmov.32 r3, d17[0]
vmov q6, q8 @ v4si
vmov.32 d12[0], r3
vmov.32 r3, d17[1]
vmov.32 d12[1], r3
vmov.32 r3, d16[0]
cputime 0.059751
cputime 0.05988
cputime 0.059885
-O2 -ftree-vectorize -ftree-slp-vectorize -ftree-loop-vectorize -ffast-math -ftree-vectorizer-verbose=7 -funsafe-loop-optimizations -funsafe-math-optimizations
tmp = json_object_get( val, "coinbasevalue" );
if ( !tmp || !json_is_number( tmp ) )
{
applog( LOG_ERR, "JSON invalid coinbasevalue" );
goto out;
}
cbvalue = (int64_t) ( json_is_integer( tmp ) ? json_integer_value( tmp )
: json_number_value( tmp ) );