I have a logic test in the end (not displayed here) to always check the numbers for correctness.
It goes like:
Writeln();
Write('Final number: ',b+bb+bbb+bbbb:0:22,' ');
if (b+bb+bbb+bbbb) > 4.0000032938759028 then Write('Result [INCORRECT - 4.0000032938759027 expected]');
if (b+bb+bbb+bbbb) < 4.0000032938759026 then Write('Result [INCORRECT- 4.0000032938759027 expected]');
...
anyway the source for c is:
#include
#include
#include
int main()
{
printf("\n");
const double a = 911798473;
const double aa = 143314345;
const double aaa = 531432117;
const double aaaa = 343211418;
unsigned int i;
double score;
double b;
double bb;
double bbb;
double bbbb;
b = a;
bb = aa;
bbb = aaa;
bbbb = aaaa;
double total_time;
clock_t start, end;
start = clock();
for (i = 0; i <100000000; i++)
{
b=sqrt (b);
bb=sqrt(bb);
bbb=sqrt(bbb);
bbbb=sqrt(bbbb);
if (b <= 1.0000001) {b=b+i+12.432432432;}
if (bb <= 1.0000001) {bb=bb+i+15.4324442;}
if (bbb <= 1.0000001) {bbb=bbb+i+19.42884;}
if (bbbb <= 1.0000001) {bbbb=bbbb+i+34.481;}
}
end = clock();
total_time = ((double) (end - start)) / CLOCKS_PER_SEC * 1000;
score = (10000000 / total_time);
printf("\nTime elapsed: %0.0f msecs", total_time);
printf("\nScore: %0.0f\n", score);
return 0;
}
And pascal/asm (freepascal / 64 / linux) - including the logic test.
{$ASMMODE intel}
Uses sysutils;
Const //some randomly chosen constants to begin math functions
a: double = 911798473;
aa: double = 143314345;
aaa: double = 531432117;
aaaa: double = 343211418;
Var
b,bb,bbb,bbbb: double; //variables that will be used for storing square roots
time1,score: single; //how much time the program took, and what the benchmark score is
i: longword; //loop counter
Begin
Writeln(); //just printing an empty line
b:=a; //begin to assign some large values in order to start finding square roots
bb:=aa;
bbb:=aaa;
bbbb:=aaaa;
sleep(100); // a 100ms delay before we start the timer, so that any I/O has stopped
time1:= GetTickCount64();
for i:= 1 to 100000000 do //100mn loop
begin;
asm
movlpd xmm1, b //loading the first variable "b" to the lower part of xmm1
movhpd xmm1, bb //loading the second variable "bb" to the higher part of xmm1
SQRTPD xmm1, xmm1 //batch processing both variables for their square root, in the same register, with one SIMD command
movlpd xmm2, bbb //loading the third variable "bbb" to the lower part of xmm2
movhpd xmm2, bbbb //loading the fourth variable "bbbb" to the higher part of xmm2
SQRTPD xmm2, xmm2 //batch processing their square roots
movlpd b, xmm1 //
movhpd bb, xmm1 // Returning all results from the register back to memory (the Pascal program variables)
movlpd bbb, xmm2 //
movhpd bbbb, xmm2 //
end;
{ b:=sqrt(b); // This entire part was replaced with the asm above.
bb:=sqrt(bb); // In my machine this code gives me ~4530ms while the asm above gives 2240ms.
bbb:=sqrt(bbb); //
bbbb:=sqrt(bbbb);} //
if b <= 1.0000001 then b:=b+i+12.432432432; // increase b/bb/bbb/bbb back to higher values by
if bb <= 1.0000001 then bb:=bb+i+15.4324442; // adding integers and decimals on them, in order
if bbb <= 1.0000001 then bbb:=bbb+i+19.42884; // to keep the variables large and continue the
if bbbb <= 1.0000001 then bbbb:=bbbb+i+34.481; // process of finding square roots, instead of the variables going to "1"
// due to finite decimal precision.
end;
time1:= GetTickCount64() - time1;
score:= 10000000 / time1; // Just a way to give a "score" insead of just time elapsed.
// Baseline calibration is at 1000 points rewarded for 10000ms delay...
// In other words if you finish 5 times faster, say 2000ms, you get 5000 points.
Writeln();
Write('Final number: ',b+bb+bbb+bbbb:0:22,' ');
if (b+bb+bbb+bbbb) > 4.0000032938759028 then Write('Result [INCORRECT - 4.0000032938759027 expected]'); //checking result
if (b+bb+bbb+bbbb) < 4.0000032938759026 then Write('Result [INCORRECT- 4.0000032938759027 expected]'); //checking result
Writeln();
Writeln('Time elapsed: ',time1:0:0,' msecs.'); // Time elapsed announced to the user
Writeln('Score: ', FloatToStr(round(score))); // Score announced to the user
End.