Algorithm Math Delphi

Özellikle grafik-matematik işlemleri yapıyorsanız hızlı işlem yapmanız gerekir.
Hız sağlanabilmesi için iki önemli bileşen: ASSEMBLER ve REGISTER
Asembler ana dili sayılır. Register ise yazdığınız program parçasının
CPU nun cashe belleğinde saklanmasını sağlar..Böylece program parçacığınız
çok hızlı çalıştırılmış (program parçasına daha kısa sürede erişilmiş) olur.
Dikkat!!! Çok hızlı çalışması gereken küçük
program parçacıklarınızı(prosedür,fonksiyon) register olarak tanıtın. Aksi taktirde hız yerine
yavaşlık elde etmiş olursunuz.Çünkü cashe belleğinde bir sınırı var...(128,256,512 kb,...)
Hız farkını denemeyi unutmayın!!! kendi gözünüzle görün
daha çok matematiksel işlemlerde kullanabilirsiniz.
register olarak tanımlamak için:
procedure proc_adi(); register;
function Func_adi(): func_turu; register;
normalde tanımladığınız fonksiyonlar ve prosedürler stdcall olarak tanımlanmış sayılır.
function LnXP1(X: Extended): Extended;
asm
FLDLN2
MOV AX,WORD PTR X+8 { exponent }
FLD X
CMP AX,$3FFD { .4225 }
JB @@1
FLD1
FADD
FYL2X
JMP @@2
@@1:
FYL2XP1
@@2:
FWAIT
end;
// Log10
//
function Log10(X: Extended): Extended;
// Log.10(X):=Log.2(X) * Log.10(2)
asm
FLDLG2 { Log base ten of 2 }
FLD X
FYL2X
end;
// Log2
//
function Log2(X: Extended): Extended;
asm
FLD1
FLD X
FYL2X
end;
// Log2
//
function Log2(X: Single): Single;
asm
FLD1
FLD X
FYL2X
end;
// LogN
//
function LogN(Base, X: Extended): Extended;
// Log.N(X):=Log.2(X) / Log.2(N)
asm
FLD1
FLD X
FYL2X
FLD1
FLD Base
FYL2X
FDIV
end;
// IntPower
//
function IntPower(Base: Extended; Exponent: Integer): Extended; register;
asm
mov ecx, eax
cdq
fld1 { Result:=1 }
xor eax, edx
sub eax, edx { eax:=Abs(Exponent) }
jz @@3
fld Base
jmp @@2
@@1: fmul ST, ST { X:=Base * Base }
@@2: shr eax,1
jnc @@1
fmul ST(1),ST { Result:=Result * X }
jnz @@1
fstp st { pop X from FPU stack }
cmp ecx, 0
jge @@3
fld1
fdivrp { Result:=1 / Result }
@@3:
fwait
end;
// Power
//
function Power(const Base, Exponent: Single): Single;
begin
if Exponent=cZero then
Result:=cOne { n**0 = 1 }
else if (Base=cZero) and (Exponent>cZero) then
Result:=cZero { 0**n = 0, n > 0 }
else Result:=Exp(Exponent * Ln(Base));
end;
// Power (int exponent)
//
function Power(Base: Single; Exponent: Integer): Single;
asm
mov ecx, eax
cdq
fld1 { Result:=1 }
xor eax, edx
sub eax, edx { eax:=Abs(Exponent) }
jz @@3
fld Base
jmp @@2
@@1: fmul ST, ST { X:=Base * Base }
@@2: shr eax,1
jnc @@1
fmul ST(1),ST { Result:=Result * X }
jnz @@1
fstp st { pop X from FPU stack }
cmp ecx, 0
jge @@3
fld1
fdivrp { Result:=1 / Result }
@@3:
end;
// DegToRad (extended)
//
function DegToRad(const Degrees: Extended): Extended;
begin
Result:=Degrees * (PI / 180);
end;
// DegToRad (single)
//
function DegToRad(const Degrees : Single) : Single; register;
// Result:=Degrees * cPIdiv180;
// don't laugh, Delphi's compiler manages to make a nightmare of this one
// with pushs, pops, etc. in its default compile... (this one is twice faster !)
asm
FLD DWORD PTR [EBP+8]
FMUL cPIdiv180
end;
// RadToDeg (extended)
//
function RadToDeg(const Radians: Extended): Extended;
begin
Result:=Radians * (180 / PI);
end;
// RadToDeg (single)
//
function RadToDeg(const Radians: Single): Single;
// Result:=Radians * c180divPI;
// don't laugh, Delphi's compiler manages to make a nightmare of this one
// with pushs, pops, etc. in its default compile... (this one is twice faster !)
asm
FLD DWORD PTR [EBP+8]
FMUL c180divPI
end;
// NormalizeAngle
//
function NormalizeAngle(angle : Single) : Single;
begin
Result:=angle-Int(angle*cInv2PI)*c2PI;
if Result>PI then
Result:=Result-2*PI
else if Result<-PI then
Result:=Result+2*PI;
end;
// NormalizeDegAngle
//
function NormalizeDegAngle(angle : Single) : Single;
begin
Result:=angle-Int(angle*cInv360)*c360;
if Result>c180 then
Result:=Result-c360
else if Result<-c180 then
Result:=Result+c360;
end;
// SinCos (Extended)
//
procedure SinCos(const Theta: Extended; var Sin, Cos: Extended); register;
// EAX contains address of Sin
// EDX contains address of Cos
// Theta is passed over the stack
asm
FLD Theta
FSINCOS
FSTP TBYTE PTR [EDX] // cosine
FSTP TBYTE PTR [EAX] // sine
end;
// SinCos (Double)
//
procedure SinCos(const Theta: Double; var Sin, Cos: Double); register;
// EAX contains address of Sin
// EDX contains address of Cos
// Theta is passed over the stack
asm
FLD Theta
FSINCOS
FSTP QWORD PTR [EDX] // cosine
FSTP QWORD PTR [EAX] // sine
end;
// SinCos (Single)
//
procedure SinCos(const Theta: Single; var Sin, Cos: Single); register;
// EAX contains address of Sin
// EDX contains address of Cos
// Theta is passed over the stack
asm
FLD Theta
FSINCOS
FSTP DWORD PTR [EDX] // cosine
FSTP DWORD PTR [EAX] // sine
end;
// SinCos (Extended w radius)
//
procedure SinCos(const theta, radius : Double; var Sin, Cos: Extended); register;
// EAX contains address of Sin
// EDX contains address of Cos
// Theta is passed over the stack
asm
FLD theta
FSINCOS
FMUL radius
FSTP TBYTE PTR [EDX] // cosine
FMUL radius
FSTP TBYTE PTR [EAX] // sine
end;
// SinCos (Double w radius)
//
procedure SinCos(const theta, radius : Double; var Sin, Cos: Double); register;
// EAX contains address of Sin
// EDX contains address of Cos
// Theta is passed over the stack
asm
FLD theta
FSINCOS
FMUL radius
FSTP QWORD PTR [EDX] // cosine
FMUL radius
FSTP QWORD PTR [EAX] // sine
end;
// SinCos (Single w radius)
//
procedure SinCos(const theta, radius : Single; var Sin, Cos: Single); register;
// EAX contains address of Sin
// EDX contains address of Cos
// Theta is passed over the stack
asm
FLD theta
FSINCOS
FMUL radius
FSTP DWORD PTR [EDX] // cosine
FMUL radius
FSTP DWORD PTR [EAX] // sine
end;
// PrepareSinCosCache
//
procedure PrepareSinCosCache(var s, c : array of Single;
startAngle, stopAngle : Single);
var
i : Integer;
d, alpha, beta : Single;
begin
Assert((High(s)=High(c)) and (Low(s)=Low(c)));
stopAngle:=stopAngle+1e-5;
if High(s)>Low(s) then
d:=cPIdiv180*(stopAngle-startAngle)/(High(s)-Low(s))
else d:=0;
if High(s)-Low(s)<1000 then begin
// Fast computation (approx 5.5x)
alpha:=2*Sqr(Sin(d*0.5));
beta:=Sin(d);
SinCos(startAngle, s[Low(s)], c[Low(s)]);
for i:=Low(s) to High(s)-1 do begin
// Make use of the incremental formulae:
// cos (theta+delta) = cos(theta) - [alpha*cos(theta) + beta*sin(theta)]
// sin (theta+delta) = sin(theta) - [alpha*sin(theta) - beta*cos(theta)]
c[i+1]:= c[i] - alpha * c[i] - beta * s[i];
s[i+1]:= s[i] - alpha * s[i] + beta * c[i];
end;
end else begin
// Slower, but maintains precision when steps are small
for i:=Low(s) to High(s) do
SinCos((i-Low(s))*d+startAngle, s[i], c[i]);
end;
end;
// ArcCos (Extended)
//
function ArcCos(const x : Extended): Extended;
begin
Result:=ArcTan2(Sqrt(1 - Sqr(X)), X);
end;
// ArcCos (Single)
//
function ArcCos(const x : Single): Single; register;
// Result:=ArcTan2(Sqrt(c1 - X * X), X);
asm
FLD X
FMUL ST, ST
FSUBR cOne
FSQRT
FLD X
FPATAN
end;
// ArcSin (Extended)
//
function ArcSin(const x : Extended) : Extended;
begin
Result:=ArcTan2(X, Sqrt(1 - Sqr(X)))
end;
// ArcSin (Single)
//
function ArcSin(const x : Single) : Single;
// Result:=ArcTan2(X, Sqrt(1 - X * X))
asm
FLD X
FLD ST
FMUL ST, ST
FSUBR cOne
FSQRT
FPATAN
end;
// ArcTan2 (Extended)
//
function ArcTan2(const y, x : Extended) : Extended;
asm
FLD Y
FLD X
FPATAN
end;
// ArcTan2 (Single)
//
function ArcTan2(const y, x : Single) : Single;
asm
FLD Y
FLD X
FPATAN
end;
// Tan (Extended)
//
function Tan(const x : Extended) : Extended;
asm
FLD X
FPTAN
FSTP ST(0) // FPTAN pushes 1.0 after result
end;
// Tan (Single)
//
function Tan(const x : Single) : Single;
asm
FLD X
FPTAN
FSTP ST(0) // FPTAN pushes 1.0 after result
end;
// CoTan (Extended)
//
function CoTan(const x : Extended) : Extended;
asm
FLD X
FPTAN
FDIVRP
end;
// CoTan (Single)
//
function CoTan(const x : Single) : Single;
asm
FLD X
FPTAN
FDIVRP
end;
// RSqrt
//
function RSqrt(v : Single) : Single;
asm
test vSIMD, 1
jz @@FPU
@@3DNow:
lea eax, [ebp+8]
db $0F,$6E,$00 /// movd mm0, [eax]
db $0F,$0F,$C8,$97 /// pfrsqrt mm1, mm0
db $0F,$6F,$D1 /// movq mm2, mm1
db $0F,$0F,$C9,$B4 /// pfmul mm1, mm1
db $0F,$0F,$C8,$A7 /// pfrsqit1 mm1, mm0
db $0F,$0F,$CA,$B6 /// pfrcpit2 mm1, mm2
db $0F,$7E,$08 /// movd [eax], mm1
db $0F,$0E /// femms
fld dword ptr [eax]
jmp @@End
@@FPU:
fld v
fsqrt
fld1
fdivr
@@End:
end;
// ISqrt
//
function ISqrt(i : Integer) : Integer; register;
//begin
// Result:=Round(Sqrt(i));
asm
push eax
test vSIMD, 1
jz @@FPU
@@3DNow:
db $0F,$6E,$04,$24 /// movd mm0, [esp]
db $0F,$0F,$C8,$0D /// pi2fd mm1, mm0
db $0F,$0F,$D1,$97 /// pfrsqrt mm2, mm1
db $0F,$0F,$DA,$96 /// pfrcp mm3, mm2
db $0F,$0F,$E3,$1D /// pf2id mm4, mm3
db $0F,$7E,$24,$24 /// movd [esp], mm4
db $0F,$0E /// femms
pop eax
ret
@@FPU:
fild dword ptr [esp]
fsqrt
fistp dword ptr [esp]
pop eax
end;
// ILength
//
function ILength(x, y : Integer) : Integer;
asm
push edx
push eax
fild dword ptr [esp]
fmul ST(0), ST(0)
fild dword ptr [esp+4]
fmul ST(0), ST(0)
faddp
fsqrt
fistp dword ptr [esp+4]
pop edx
pop eax
end;
// ILength
//
function ILength(x, y, z : Integer) : Integer;
asm
push ecx
push edx
push eax
fild dword ptr [esp]
fmul ST(0), ST(0)
fild dword ptr [esp+4]
fmul ST(0), ST(0)
faddp
fild dword ptr [esp+8]
fmul ST(0), ST(0)
faddp
fsqrt
fistp dword ptr [esp+8]
pop ecx
pop edx
pop eax
end;
// RLength
//
function RLength(x, y : Single) : Single;
asm
test vSIMD, 1
jz @@FPU
@@3DNow:
{ test vSIMD, 1
jz @@FPU
@@3DNow:
movd mm0, dword ptr [ebp+8]
punpckldq mm0, qword ptr [ebp+12]
pfmul mm0, mm0
pfacc mm0, mm0
pfrsqrt mm1, mm0
movq mm2, mm1
pfmul mm2, mm1
pfrsqit1 mm0, mm2
pfrcpit2 mm0, mm1
movd dword ptr [ebp-4], mm0
femms
fld dword ptr [ebp-4]
jmp @@End }
@@FPU:
fld x
fmul x
fld y
fmul y
fadd
fsqrt
fld1
fdivr
@@End:
end;
// RandomPointOnSphere
//
procedure RandomPointOnSphere(var p : TAffineVector);
var
t, w : Single;
begin
p[2]:=2*Random-1;
t:=2*PI*Random;
w:=Sqrt(1-p[2]*p[2]);
SinCos(t, w, p[1], p[0]);
end;
// Trunc64 (extended)
//
function Trunc64(v : Extended) : Int64; register;
asm
SUB ESP,12
FSTCW [ESP]
FLDCW cwChop
FLD v
FISTP qword ptr [ESP+4]
FLDCW [ESP]
POP ECX
POP EAX
POP EDX
end;
// Trunc (single)
//
function Trunc(v : Single) : Integer; register;
asm
SUB ESP,8
FSTCW [ESP]
FLDCW cwChop
FLD v
FISTP dword ptr [ESP+4]
FLDCW [ESP]
POP ECX
POP EAX
end;
// Int (Extended)
//
function Int(v : Extended) : Extended;
asm
SUB ESP,4
FSTCW [ESP]
FLDCW cwChop
FLD v
FRNDINT
FLDCW [ESP]
ADD ESP,4
end;
// Int (Single)
//
function Int(v : Single) : Single;
asm
SUB ESP,4
FSTCW [ESP]
FLDCW cwChop
FLD v
FRNDINT
FLDCW [ESP]
ADD ESP,4
end;
// Frac (Extended)
//
function Frac(v : Extended) : Extended;
asm
SUB ESP,4
FSTCW [ESP]
FLDCW cwChop
FLD v
FLD ST
FRNDINT
FSUB
FLDCW [ESP]
ADD ESP,4
end;
// Frac (Extended)
//
function Frac(v : Single) : Single;
asm
SUB ESP,4
FSTCW [ESP]
FLDCW cwChop
FLD v
FLD ST
FRNDINT
FSUB
FLDCW [ESP]
ADD ESP,4
end;
// Round64 (Extended);
//
function Round64(v : Extended) : Int64; register;
asm
SUB ESP,8
FLD v
FISTP qword ptr [ESP]
POP EAX
POP EDX
end;
// Round (Single);
//
function Round(v : Single) : Integer; register;
asm
SUB ESP,4
FLD v
FISTP dword ptr [ESP]
POP EAX
end;
// Ceil64 (Extended)
//
function Ceil64(v : Extended) : Int64; overload;
begin
if Frac(v)>0 then
Result:=Trunc(v)+1
else Result:=Trunc(v);
end;
// Ceil (Single)
//
function Ceil(v : Single) : Integer; overload;
begin
if Frac(v)>0 then
Result:=Trunc(v)+1
else Result:=Trunc(v)
end;
// Floor64 (Extended)
//
function Floor64(v : Extended) : Int64; overload;
begin
if Frac(v)<0 then
Result:=Trunc(v)-1
else Result:=Trunc(v);
end;
// Floor (Single)
//
function Floor(v : Single) : Integer; overload;
begin
if Frac(v)<0 then
Result:=Trunc(v)-1
else Result:=Trunc(v);
end;
// Sign
//
function Sign(x : Single) : Integer;
begin
if x<0 then
Result:=-1
else if x>0 then
Result:=1
else Result:=0;
end;
function IsInRange(const x, a, b : Single) : Boolean;
begin
if a Result:=(a<=x) and (x<=b)
else Result:=(b<=x) and (x<=a);
end;
// IsInCube (affine)
//
function IsInCube(const p, d : TAffineVector) : Boolean; overload;
begin
Result:= ((p[0]>=-d[0]) and (p[0]<=d[0]))
and ((p[1]>=-d[1]) and (p[1]<=d[1]))
and ((p[2]>=-d[2]) and (p[2]<=d[2]));
end;
// IsInCube (hmg)
//
function IsInCube(const p, d : TVector) : Boolean; overload;
begin
Result:= ((p[0]>=-d[0]) and (p[0]<=d[0]))
and ((p[1]>=-d[1]) and (p[1]<=d[1]))
and ((p[2]>=-d[2]) and (p[2]<=d[2]));
end;
// MinFloat (single)
//
function MinFloat(values : PSingleArray; nbItems : Integer) : Single;
var
i : Integer;
begin
if nbItems>0 then begin
Result:=values[0];
for i:=1 to nbItems-1 do
if values[i] end else Result:=0;
end;
// MinFloat (double)
//
function MinFloat(values : PDoubleArray; nbItems : Integer) : Double;
var
i : Integer;
begin
if nbItems>0 then begin
Result:=values[0];
for i:=1 to nbItems-1 do
if values[i] end else Result:=0;
end;
// MinFloat (extended)
//
function MinFloat(values : PExtendedArray; nbItems : Integer) : Extended;
var
i : Integer;
begin
if nbItems>0 then begin
Result:=values[0];
for i:=1 to nbItems-1 do
if values[i] end else Result:=0;
end;
// MinFloat (single 2)
//
function MinFloat(const v1, v2 : Single) : Single;
begin
if v1 Result:=v1
else Result:=v2;
end;
// MaxFloat (single)
//
function MaxFloat(values : PSingleArray; nbItems : Integer) : Single; overload;
var
i : Integer;
begin
if nbItems>0 then begin
Result:=values[0];
for i:=1 to nbItems-1 do
if values[i]>Result then Result:=values[i];
end else Result:=0;
end;
// MaxFloat (double)
//
function MaxFloat(values : PDoubleArray; nbItems : Integer) : Double; overload;
var
i : Integer;
begin
if nbItems>0 then begin
Result:=values[0];
for i:=1 to nbItems-1 do
if values[i]>Result then Result:=values[i];
end else Result:=0;
end;
// MaxFloat (extended)
//
function MaxFloat(values : PExtendedArray; nbItems : Integer) : Extended; overload;
var
i : Integer;
begin
if nbItems>0 then begin
Result:=values[0];
for i:=1 to nbItems-1 do
if values[i]>Result then Result:=values[i];
end else Result:=0;
end;
// MaxFloat
//
function MaxFloat(const v1, v2 : Single) : Single;
begin
if v1>v2 then
Result:=v1
else Result:=v2;
end;
// MaxFloat
//
function MaxFloat(const v1, v2 : Double) : Double;
begin
if v1>v2 then
Result:=v1
else Result:=v2;
end;
// MaxFloat
//
function MaxFloat(const v1, v2 : Extended) : Extended;
begin
if v1>v2 then
Result:=v1
else Result:=v2;
end;
// MaxFloat
//
function MaxFloat(const v1, v2, v3 : Single) : Single;
begin
if v1>=v2 then
if v1>=v3 then
Result:=v1
else if v3>=v2 then
Result:=v3
else Result:=v2
else if v2>=v3 then
Result:=v2
else if v3>=v1 then
Result:=v3
else result:=v1;
end;
// MaxFloat
//
function MaxFloat(const v1, v2, v3 : Double) : Double;
begin
if v1>=v2 then
if v1>=v3 then
Result:=v1
else if v3>=v2 then
Result:=v3
else Result:=v2
else if v2>=v3 then
Result:=v2
else if v3>=v1 then
Result:=v3
else result:=v1;
end;
// MaxFloat
//
function MaxFloat(const v1, v2, v3 : Extended) : Extended;
begin
if v1>=v2 then
if v1>=v3 then
Result:=v1
else if v3>=v2 then
Result:=v3
else Result:=v2
else if v2>=v3 then
Result:=v2
else if v3>=v1 then
Result:=v3
else result:=v1;
end;
procedure ScaleFloatArray(values : PSingleArray; nb : Integer;
var factor : Single); register;
asm
test vSIMD, 1
jz @@FPU
push edx
shr edx, 2
or edx, edx
jz @@FPU
db $0F,$6E,$39 /// movd mm7, [ecx]
db $0F,$62,$FF /// punpckldq mm7, mm7
@@3DNowLoop:
db $0F,$0D,$48,$40 /// prefetchw [eax+64]
db $0F,$6F,$00 /// movq mm0, [eax]
db $0F,$6F,$48,$08 /// movq mm1, [eax+8]
db $0F,$0F,$C7,$B4 /// pfmul mm0, mm7
db $0F,$0F,$CF,$B4 /// pfmul mm1, mm7
db $0F,$7F,$00 /// movq [eax], mm0
db $0F,$7F,$48,$08 /// movq [eax+8], mm1
add eax, 16
dec edx
jnz @@3DNowLoop
pop edx
and edx, 3
db $0F,$0E /// femms
@@FPU:
push edx
shr edx, 1
or edx, edx
jz @@FPULone
@@FPULoop:
fld dword ptr [eax]
fmul dword ptr [ecx]
fstp dword ptr [eax]
fld dword ptr [eax+4]
fmul dword ptr [ecx]
fstp dword ptr [eax+4]
add eax, 8
dec edx
jnz @@FPULoop
@@FPULone:
pop edx
test edx, 1
jz @@End
fld dword ptr [eax]
fmul dword ptr [ecx]
fstp dword ptr [eax]
@@End:
end;
// ScaleFloatArray (array)
//
procedure ScaleFloatArray(var values : TSingleArray;
factor : Single);
begin
if Length(values)>0 then
ScaleFloatArray(@values[0], Length(values), factor);
end;
initialization
//--------------------------------------------------------------
//--------------------------------------------------------------
//--------------------------------------------------------------
try
// detect 3DNow! capable CPU (adapted from AMD's "3DNow! Porting Guide")
asm
pusha
mov eax, $80000000
db $0F,$A2 /// cpuid
cmp eax, $80000000
jbe @@No3DNow
mov eax, $80000001
db $0F,$A2 /// cpuid
test edx, $80000000
jz @@No3DNow
mov vSIMD, 1
@@No3DNow:
popa
end;
except
// trap for old/exotics CPUs
vSIMD:=0;
end;
function GeometryOptimizationMode : String;
begin
case vSIMD of
0 : Result:='FPU';
1 : Result:='3DNow!';
2 : Result:='SSE';
else
Result:='*ERR*';
end;
end;
var
// this var is adjusted during "initialization", current values are
// + 0 : use standard optimized FPU code
// + 1 : use 3DNow! optimized code (requires K6-2/3 CPU)
// + 2 : use Intel SSE code (Pentium III, NOT IMPLEMENTED YET !)
vSIMD : Byte = 0;
var
vOldSIMD : Byte;
vFPUOnlySectionCounter : Integer;
procedure BeginFPUOnlySection;
begin
if vFPUOnlySectionCounter=0 then
vOldSIMD:=vSIMD;
Inc(vFPUOnlySectionCounter);
vSIMD:=0;
end;
// EndFPUOnlySection
//
procedure EndFPUOnlySection;
begin
Dec(vFPUOnlySectionCounter);
Assert(vFPUOnlySectionCounter>=0);
if vFPUOnlySectionCounter=0 then
vSIMD:=vOldSIMD;
end;