Title: Fuzzy Matching Strings
Question: How to get an idea of how closely 2 strings match
Answer:
unit FuzzyMatch;
{This unit provides a basic 'fuzzy match' index on how alike two strings are
The result is of type 'single': near 0 - poor match
near 1 - close match
The intention is that HowAlike(s1,s2)=HowAlike(s2,s1)
The Function is not case sensitive}
interface
uses sysutils;
function HowAlike(s1,s2:string):single;
implementation
function instr(start:integer;ToSearch,ToFind:string):integer;
begin
//This is a quick implementation of the VB InStr, since Pos just doesn't do what is needed!!
//NB - case sensitive!!
if start1 then Delete(ToSearch,1,start-1);
result:=pos(ToFind,ToSearch);
if (result0) and (start1) then inc(result,start);
end;
function HowAlike(s1,s2:string):single;
var l1,l2,pass,position,size,foundpos,maxscore:integer;
score,scored,string1pos,string2pos,bestmatchpos:single;
swapstring,searchblock:string;
begin
s1:=Uppercase(trim(s1));
s2:=Uppercase(trim(s2));
score:=0;
maxscore:=0;
scored:=0;
//deal with zero length strings...
if (s1='') and (s2='') then
begin
result:=1;
exit;
end
else
if (s1='') or (s2='') then
begin
result:=0;
exit;
end;
//why perform any mathematics is the result is clear?
if s1=s2 then
begin
result:=1;
exit;
end;
//make two passes,
// with s1 and s2 each way round to ensure
// consistent results
for pass:=1 to 2 do
begin
l1:=length(s1);
l2:=length(s2);
for size:=l1 downto 1 do
begin
for position:=1 to (l1-size+1) do
begin
//try to find implied block in the other string
//Big blocks score much better than small blocks
searchblock:=copy(s1,position,size);
foundpos:=pos(searchblock,s2);
if size=l1 then
string1pos:=0.5
else
string1pos:=(position-1)/(l1-size);
if foundpos0 then
begin
//the string is in somewhere in there
// - find the 'closest' one.
bestmatchpos:=-100; //won't find anything that far away!
repeat
if size=l2 then
string2pos:=0.5
else
string2pos:=(foundpos-1)/(l2-size);
//If this closer than the previous best?
if abs(string2pos-string1pos) bestmatchpos:=string2pos;
foundpos:=instr(foundpos+1,s2,searchblock);
until foundpos=0; //loop while foundpos0..
//The closest position is now known: Score it!
//Score as follows: (1-distance of best match)
score:=score+(1-abs(string1pos-bestmatchpos));
end;
//Keep track if the maximum possible score
//BE CAREFUL IF CHANGING THIS FUNCTION!!!
//maxscore:=maxscore+1;
inc(maxscore);
end; //for position..
end; //for size..
if pass=1 then
begin
//swap the strings around
swapstring:=s1;
s1:=s2;
s2:=swapstring;
end;
//Each pass is weighted equally
scored:=scored+(0.5*(score/maxscore));
score:=0;
maxscore:=0;
end; //for pass..
//HowAlike=score/maxscore
result:=scored;
end;
end.