Strings Delphi

Title: Speeding AnsiStrings tricks, and some code (2-Code)
Question: How AnsiStrings work, some tricks and reusable code to reduce unnecessary reallocations.
Answer:
This is the packaged code, for an intro, see Part I.
unit StrSysUtils;
// Written by: TheDelphiGuy, 2005
interface
uses
SysUtils;
// Low-level AnsiString stuff
//====================================================
function StrLenEqual( const Str1, Str2 : AnsiString ) : boolean; overload;
// A fast length comparison
procedure StrConcatInit( var aStr{ : AnsiString}; const InitStr : AnsiString; var RealLen : integer; AllocBy : integer = 512 ); overload;
// Sets aStr to InitStr, initializes RealLen := length(InitStr), allocating as many [AllocBy]-sized chunks as needed.
// You'll normally call this one
procedure StrConcatInit( var aStr{ : AnsiString}; InitStr : pchar; var RealLen : integer; AllocBy : integer = 512 ); overload;
// Same, with a pchar, so RealLen := [pos of #0 inside InitStr]
procedure StrConcatInit( var aStr{ : AnsiString}; var RealLen : integer; AllocBy : integer = 512 ); overload;
// Keeps the current value of aStr, initializes RealLen := length(aStr), allocating as many [AllocBy]-sized chunks as needed.
procedure StrConcat( var aStr{ : AnsiString}; const NewStr : AnsiString; var RealLen : integer; AllocBy : integer = 512 ); overload;
// Usage Example:
// strConcatInit( wFullText, '', wLen ); { Init }
// for i := low( StrList ) to high( StrList ) do
// strConcat( wFullText, StrList[i], wLen ); { Concat & update wLen with real length }
// strSetLen( wFullText, wLen ); { Restore real length }
procedure StrConcat( var aStr{ : AnsiString}; ch : char; var RealLen : integer; AllocBy : integer = 512 ); overload;
procedure StrConcat( var aStr{ : AnsiString}; NewStr : pchar; var RealLen : integer; AllocBy : integer = 512 ); overload;
// Same, but will concat a pchar
procedure StrInsert( var aStr{ : AnsiString}; const NewStr : AnsiString; At : integer; var RealLen : integer; AllocBy : integer = 512 ); overload;
// Like StrConcat, but inserting [NewStr] in [At] position
procedure StrInsert( var aStr{ : AnsiString}; NewStr : pchar; At : integer; var RealLen : integer; AllocBy : integer = 512 ); overload;
procedure strSetLen( var aStr {: AnsiString}; lenNew : cardinal );
// Simply updates the .strLen field, without reallocating memory. Should be faster than System.SetLength provided that:
// aStr is unique (RefCount = 1), *AND* we are reducing the length of the AnsiString
// NOTE: While this should reduce memory fragmentation, keep in mind that the extra characters remain allocated,
// so in some cases, where a lot of these strings are kept, you may want to revert to System.SetLength.
// Useful mainly for calls to the Windows API:
// ...
// SetLegth( Result, MAX_PATH );
// Count := WinApiCall( Result, MAX_PATH );
// strSetLen( Result, Count ); { Eliminates the reallocation of Result }
// ...
procedure strDecLen( var aStr {:AnsiString}; Delta : integer );
// See strSetLen
procedure StrAssertCapacity( var aStr{ : AnsiString}; RequiredSize : integer; AllocBy : integer = 512 );
// Makes sure [aStr] has room for at least [RequiredSize] characters
// if not, length is increased in as many [AllocBy]-sized chunks as needed
type // This is the structure found *BEFORE* string characters:
PUndocStrStruct = ^TUndocStrStruct;
TUndocStrStruct =
packed record
strRefCount : longint;
strLength : longint;
end;
const
strSkew = sizeof( TUndocStrStruct );
strOverhead = sizeof( TUndocStrStruct ) + 1; // Size of struct + the ending #0
// Quick Hashing
//====================================================
function strHashCut4( const aStr : AnsiString ) : integer; // Returns the first 4 chars as the hash
function strHashCut8( const aStr : AnsiString ) : integer; // Returns a combination of the first 8 chars as the hash
implementation
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Utility functions
//====================================================
function Sign( Num : integer ) : integer; // -1, 0, or 1
asm // On entry: EAX: Num
shl eax, 1
sbb eax, eax
lea eax, [eax + eax + 1]
end;
function NearestMultUp( Num, Step : integer ) : integer; overload;
begin
Result := Num + step - 1; // Num + sign( Num ) * ( step - 1 ); if Num can be Result := Result - ( Result mod Step );
end;
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Quick Hashing
//====================================================
function strHashCut4( const aStr : AnsiString ) : integer;
var
pchStr : pchar absolute aStr;
begin
case length( aStr ) of
0 :
Result := 0;
1, 2 : // Remember that AnsiStrings are #0 terminated,
Result := pword( pchStr )^; // so 'a' is really 'a'#0... which we can conveniently consider as a 2-char str
else
Result := pinteger( pchStr )^; // Same for a 3-char str
end;
end;
function strHashCut8( const aStr : AnsiString ) : integer;
var
pchStr : pchar absolute aStr;
begin
case length( aStr ) of
0 :
Result := 0;
1, 2 :
Result := pword( pchStr )^;
3, 4 :
Result := pinteger( pchStr )^;
5, 6 :
Result := pinteger( pchStr )^ xor pword( pchStr+4 )^;
else
Result := pinteger( pchStr )^ xor pinteger( pchStr+4 )^;
end;
end;
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//function StrStruct( const aStr : AnsiString ) : PUndocStrStruct;
// begin
// if integer( aStr ) 0
// then Result := PUndocStrStruct( integer( aStr ) - StrSkew )
// else Result := nil;
// end;
function StrLenEqual( const Str1, Str2 : AnsiString ) : boolean; overload;
asm // On entry: EAX= Str1, EDX = Str2
cmp eax, edx
je @ItsTrue // Str1 = Str2, including Str1 = Str2 = nil
or eax, eax // Str1 = ''
jz @ItsFalse
or edx, edx // Str2 = ''
jz @ItsFalse
mov ecx, [eax-strSkew].TUndocStrStruct.strLength
cmp ecx, [edx-strSkew].TUndocStrStruct.strLength
jne @ItsFalse
@ItsTrue:
or al, 1
ret
@ItsFalse:
xor eax, eax
end;
function strGetLen( const aStr : AnsiString ) : integer;
// begin
// if integer( aStr ) 0
// then Result := PUndocStrStruct( integer( aStr ) - StrSkew ).strLength
// else Result := 0;
// end;
asm // On entry: EAX= aStr
test eax, eax
je @Quit
mov eax, [eax-strSkew].TUndocStrStruct.strLength
@Quit:
end;
procedure strSetLen( var aStr {: AnsiString}; lenNew : cardinal );
{$IFOPT C+}
var
s : AnsiString absolute aStr;
pStr : PUndocStrStruct;
begin
if integer( aStr ) = 0
then SetLength( s, lenNew )
else
begin
pStr := PUndocStrStruct( integer( aStr ) - StrSkew );
assert( pStr.strRefCount = 1, 'Should NOT call strSetLen with shared strings (eg, strRefCount 1)!!' );
if cardinal( pStr.strLength ) lenNew
then
begin
pStr.strLength := lenNew;
pchar(aStr)[lenNew] := #0;
end;
end;
end;
{$ELSE}
asm // On entry: EAX= pointer to aStr, EDX = lenNew
mov ecx, [eax] // ECX = aStr
test ecx, ecx // Empty?
je @NormalCall
cmp edx, [ecx-strSkew].TUndocStrStruct.strLength
jae @NormalCall
mov [ecx-strSkew].TUndocStrStruct.strLength, edx
mov byte ptr [ecx+edx], 0 // Put the #0
ret
@NormalCall:
call System.@LStrSetLength
end;
{$ENDIF}
procedure strDecLen( var aStr {:AnsiString}; Delta : integer );
{$IFOPT C+}
var
s : AnsiString absolute aStr;
pStr : PUndocStrStruct;
begin
if integer( aStr ) = 0
then pStr := nil
else pStr := PUndocStrStruct( integer( aStr ) - StrSkew );
assert( pStr.strRefCount = 1, 'Should NOT call strSetLen with shared strings (eg, strRefCount 1)!!' );
assert( pStr.strLength 0, 'Should NOT call strDecLen with shared strings (eg, strRefCount 1)!!' );
if ( pStr nil ) and ( pStr.strLength = Delta )
then
begin
pStr.strLength := pStr.strLength - Delta;
pchar(aStr)[pStr.strLength] := #0;
end;
end;
{$ELSE}
asm // On entry: EAX= pointer to aStr, EDX = lenNew
mov ecx, [eax] // ECX = aStr
test ecx, ecx // Empty?
je @NormalCall
cmp edx, [ecx-strSkew].TUndocStrStruct.strLength
ja @NormalCall
sub [ecx-strSkew].TUndocStrStruct.strLength, edx
mov byte ptr [ecx+edx], 0 // Put the #0
ret
@NormalCall:
call System.@LStrSetLength
end;
{$ENDIF}
procedure StrAssertCapacity( var aStr {:AnsiString}; RequiredSize : integer; AllocBy : integer = 512 );
var
s : AnsiString absolute aStr;
newSize : integer;
begin
if length( s ) then
begin
newSize := RequiredSize + AllocBy - 1;
newSize := newSize - ( newSize mod AllocBy );
SetLength( s, newSize );
end;
end;
procedure StrConcatInit( var aStr{ : AnsiString}; var RealLen : integer; AllocBy : integer = 512 ); overload;
var
s : AnsiString absolute aStr;
begin
RealLen := 0;
SetLength( s, NearestMultUp( RealLen, AllocBy ) );
end;
procedure StrConcatInit( var aStr{ : AnsiString}; const InitStr : AnsiString; var RealLen : integer; AllocBy : integer = 512 ); overload;
var
s : AnsiString absolute aStr;
pchStr : pchar absolute aStr;
pchInitStr : pchar absolute InitStr;
begin
RealLen := length( InitStr );
SetLength( s, NearestMultUp( RealLen, AllocBy ) );
if pchInitStr pchStr
then Move( pchInitStr[0], pchStr[0], RealLen );
end;
procedure StrConcatInit( var aStr{ : AnsiString}; InitStr : pchar; var RealLen : integer; AllocBy : integer = 512 ); overload;
var
s : AnsiString absolute aStr;
pchStr : pchar absolute aStr;
begin
RealLen := length( InitStr );
SetLength( s, NearestMultUp( RealLen, AllocBy ) );
Move( InitStr[0], pchStr[0], RealLen );
end;
procedure StrConcat( var aStr{ : AnsiString}; NewStr : pchar; var RealLen : integer; AllocBy : integer = 512 ); overload;
var
s : AnsiString absolute aStr;
lenNewStr : integer;
begin
lenNewStr := StrLen( NewStr );
if length( s ) then SetLength( s, NearestMultUp( RealLen + lenNewStr, AllocBy ) );
Move( NewStr[0], pchar(aStr)[RealLen], lenNewStr + 1 );
inc( RealLen, lenNewStr );
end;
procedure StrConcat( var aStr{ : AnsiString}; ch : char; var RealLen : integer; AllocBy : integer = 512 ); overload;
var
s : AnsiString absolute aStr;
begin
if length( s ) then SetLength( s, RealLen + AllocBy );
inc( RealLen );
pchar(aStr)[RealLen] := ch;
end;
procedure StrConcat( var aStr{ : AnsiString}; const NewStr : AnsiString; var RealLen : integer; AllocBy : integer = 512 ); overload;
var
s : AnsiString absolute aStr;
lenNewStr : integer;
begin
lenNewStr := length( NewStr );
if length( s ) then SetLength( s, NearestMultUp( RealLen + lenNewStr, AllocBy ) );
Move( pchar(NewStr)[0], (pchar(aStr) + RealLen)[0], lenNewStr + 1 );
inc( RealLen, lenNewStr );
end;
procedure StrInsert( var aStr{ : AnsiString}; NewStr : pchar; At : integer; var RealLen : integer; AllocBy : integer = 512 ); overload;
var
s : AnsiString absolute aStr;
lenNewStr : integer;
begin
lenNewStr := StrLen( NewStr );
if length( s ) then SetLength( s, NearestMultUp( RealLen + lenNewStr, AllocBy ) );
Move( pchar(aStr)[At-1], pchar(aStr)[lenNewStr + At-1], RealLen - At + 1 );
Move( pchar(NewStr)[0], pchar(aStr)[At], lenNewStr );
inc( RealLen, lenNewStr );
end;
procedure StrInsert( var aStr{ : AnsiString}; const NewStr : AnsiString; At : integer; var RealLen : integer; AllocBy : integer = 512 ); overload;
var
s : AnsiString absolute aStr;
lenNewStr : integer;
begin
lenNewStr := length( NewStr );
if length( s ) then SetLength( s, NearestMultUp( RealLen + lenNewStr, AllocBy ) );
Move( pchar(aStr)[At-1], pchar(aStr)[lenNewStr + At-1], RealLen - At + 1 );
Move( pchar(NewStr)[0], pchar(aStr)[At], lenNewStr );
inc( RealLen, lenNewStr );
end;
end.