I think that PTX compiler unroll function, but any way i unroll EQUILLESSMOREHALF function to main loop.
Result the same as early..
I think that this issues in cuda driver api (specifically in their library cuda.lib).
Because random access to memory is happening here.
And I have already encountered this before (when the miner wrote for the ethereum. On Windows 7, the miner gave 24Mx with 1063 cards, and on Windows 10 only 4mx ).
On windows 7, random access to memory worked fine on windows 10 no. I then could not defeat it then.
.func (.reg .b32 eq, .reg .b32 pos) FOUNDINSORT(.reg .b32 a0, .reg .b32 a1, .reg .b32 a2, .reg .b32 a3, .reg .b32 a4, .reg .b32 a5, .reg .b32 a6, .reg .b32 a7, .reg .b64 _arr, .reg .b32 beginrange, .reg .b32 endrange)
{
.reg .pred p, k;
.reg .b32 temp, iseq, center, counter;
.reg .b64 _Pointarray;
.reg .b32 f0, f1, f2;
.reg .b32 %b<4>;
mov.u32 counter,20;
xor.b32 eq,eq;
xor.b32 pos,pos;
$FFFOUNDINSORTmain:
setp.hi.u32 p,counter,0;
@!p bra.uni $FFFOUNDINSORTexit;
//main loop
//center= beginrange+(endrange-beginrange)/2
sub.u32 center,endrange,beginrange;
shr.b32 center,center,1;
add.u32 center,center,beginrange;
mul.wide.u32 _Pointarray,center,32;
add.u64 _Pointarray,_Pointarray,_arr;
//call.uni (iseq), EQUILLESSMOREHALF, (a0, a1, a2, a3, a4, a5, a6, a7, _Pointarray);
// try to use cmp localy
mov.u32 f0,0x00; //equil
mov.u32 f1,0x01; //less
mov.u32 f2,0x02; //more
ld.global.v4.u32 {%b0, %b1, %b2, %b3},[_Pointarray];
setp.hi.u32 p,a0,%b0;
selp.u32 f1,f2,f1,p;
setp.eq.u32 p,a0,%b0;
selp.u32 f0,f0,f1,p;
selp.u32 f2,f2,f1,p;
setp.hi.u32 p,a1,%b1;
selp.u32 f1,f2,f1,p;
setp.eq.u32 p,a1,%b1;
selp.u32 f0,f0,f1,p;
selp.u32 f2,f2,f1,p;
setp.hi.u32 p,a2,%b2;
selp.u32 f1,f2,f1,p;
setp.eq.u32 p,a2,%b2;
selp.u32 f0,f0,f1,p;
selp.u32 f2,f2,f1,p;
setp.hi.u32 p,a3,%b3;
selp.u32 f1,f2,f1,p;
setp.eq.u32 p,a3,%b3;
selp.u32 iseq,f0,f1,p;
//cmp localy end
setp.eq.u32 p,iseq,2; // findkey >Pointarray[center]
add.u32 temp,center,1;
setp.lo.u32 k,center,endrange;
selp.u32 temp,temp,endrange,k;
selp.u32 beginrange,temp,beginrange,p;
setp.eq.u32 p,iseq,1; //findkeysub.u32 temp,center,1;
//check if center is not zero
setp.hi.u32 k,center,0;
selp.u32 temp,temp,beginrange,k;
setp.hs.u32 k,temp,beginrange;
selp.u32 temp,temp,beginrange,k;
selp.u32 endrange,temp,endrange,p;
setp.eq.u32 p,iseq,0;// findkey =Pointarray[center]
selp.u32 eq,1,0,p;
selp.u32 pos,center,0,p;
sub.u32 counter,counter,1;
bra.uni $FFFOUNDINSORTmain;
$FFFOUNDINSORTexit:
ret.uni;
}
Report to moderator