{'SEARCH' module for WRITER'S TOOLKIT package...}
{A program to implement a fast text search algorithm for use with LARGE
files. The search routine uses a variation of the Boyer-Moore Search
Algorithm (adapted by the autho). The program deals with large files
by searching a piece at a time. The 'pieces' exist as buffers in memory,
and since the search algorithm involves 'backing up' in the file from
time to time, this is taken into account when loading the next n source
file bytes into the buffer (so we backtrack searchPattLen characters in
the source file before reading the next buffer)...
NOTE THAT THIS IS one UNIT TAKEN FROM A COMPLETE PROGRAM 'AS IS', without
editing, without stripping out the lines of code that don't compile, so
if you want to use the GOOD code that's here it's up to you to strip out
out what you need, etc... but I promise you, this should not be difficult!!!}
interface
uses
Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
StdCtrls, IniFiles;
const
{remember to always put your const declarations before your type
declarations in this section of a Delphi Pascal unit...}
{declare maximum size of buffer to be 63K, which is 64512 bytes.
Disk cluster sizes are usually multiples of 1024 bytes, so making
maxBufferSize also a multiple of 1024 can help speed up disc reads}
maxBufferSize = 1024 * 63;
{an 'master' constant to determine max length of edit box input,
the length of lines in the Search Results window, and also the
length of 'chunks' of text extracted from source files...}
maxLineLen = 82;
{maximum allowable length of search pattern input by the user...}
maxInputLen = maxLineLen;
{length of data 'chunk' to extract from the source string at the
position of a found match: the chunkLen variable is used when we
pull out a chunk of characters from the source file at the
position where a match is found -note that chunkLen must be less
than maxInputLen...}
chunkLen = maxLineLen;
{the next constant is used to limit searches to no more than 2000
'hits' -the assumption is that users i) don't want to sit and wait
while the search algorithm finds every instance of 'the' in all of
Shakespeare, ii) 2000 hits is already an intractably large amount
from the user's point of view...}
maxFoundMatches = 2000;
{define a constant for the 'maximum ASCII index', bearing in mind that
the ASCII table starts at 0 and ends at 255...}
maxASCindex = 255;
{in Delphi string indexes go from 1 to 255 by default, unless you want
longer strings in which case you can set a compiler switch for that...}
maxStringLen = 255;
helpFileName = 'Search.hlp';
iniFileName = 'WTSearch.ini';
type
{define a buffer as a character array type of length maxBufferSize...}
TSearchBufferArray = array[1..maxBufferSize] of char;
{and define a pointer type to point into the buffer}
TSearchBuffer = ^TSearchBufferArray;
TMatchInfo = record
fileNum: Integer;
filePos: LongInt;
fileSize: LongInt;
end;
TVisibleChars = set of Char;
type
TSrchForm = class(TForm)
SDomainButton: TButton;
PromptLabel: TLabel;
PatternEdit: TEdit;
SearchButton: TButton;
CloseButton: TButton;
HelpButton: TButton;
procedure PutClipTextInEditBox;
procedure FormCreate(Sender: TObject);
procedure FormClose(Sender: TObject; var Action: TCloseAction);
procedure FormDestroy(Sender: TObject);
procedure CloseButtonClick(Sender: TObject);
function CreateBuffer: Boolean;
procedure DestroyBuffer;
procedure InitSkipArray;
procedure InitMInfoArray;
procedure InitialiseSearch;
procedure ProcessInputText;
procedure SeparateANDInput;
function CheckPlusInputOK: Boolean;
procedure SearchButtonClick(Sender: TObject);
procedure AddFileSize(fileNumber: Integer; fileSize: Integer);
function LoadSourceFile(srcFileName: String): String;
procedure OverwriteDuffBufferChars;
function GetError(const ErrorCode: Integer): String;
procedure DefaultSearch;
procedure ANDSearch;
procedure CapitaliseBuffer;
procedure GetChunk(bufPos: LongInt);
function EditLine(tempLine: array of Char; i: Integer): String;
procedure SearchButtonKeyDown(Sender: TObject; var Key: Word;
Shift: TShiftState);
procedure SDomainButtonClick(Sender: TObject);
procedure EndSearch;
procedure HelpButtonClick(Sender: TObject);
procedure ReadINIFile;
procedure WriteINIFile;
private
{private declarations}
{all variables declared here are global to THIS unit...}
{define a buffer variable (which is of pointer type, don't forget)...}
buffer : TSearchBuffer;
{these other variables are 'global' too of course...}
{declare a 'skip' array -see psuedocode to understand it's function...}
skip: array[0..maxASCindex] of Integer;
{next variable, in conjunction with the chunkLen constant,
helps us decide how much text to extract from the source file
at ther position of a found match...}
halfaChunk: Integer;
{the numMatches variable has to be global since we need to keep track
of the number of hits found during a search, and in the process of
searching we may call a search routine many many times, so that having
numMatches local to Search() wouldn't be too clever, would it now...}
numMatches: Integer;
{next variable is used if a user wants to interrupt a search by pressing
the ESC key...}
stopSearch: Boolean;
{next variable used to keep track of the identity of the current file}
currFileNum: Integer;
{and this one to keep track of it's size...}
currFileSize: LongInt;
{see the SeparateANDInput routine for the way we use VisibleChars}
VisibleChars: TVisibleChars;
public
{public declarations...}
{declare a string for the search pattern -first index is at pos 1 remember}
searchPatt: String;
{and a similar one for 'AND' searches (where the pattern is in two parts)..}
searchPattTwo: String;
{declare a variable to hold the length of the search pattern- we use
this in various places including the search routine, obviously, but we
also use it to backtrack pattLen chars in the source file before loading
a new buffer into memory -this ensures that the search algorithm does
not miss any matches at the join between buffers as backtracking is
involved in the search process...}
pattLen: Integer;
{and a similar one for 'AND' searches (where the pattern is in two parts)..}
pattLenTwo: Integer;
{a variable to help keep track of different kinds of searches...}
searchType: String;
{next a list of strings to hold the names of all user-selected
files in the 'search domain'...}
fileList: TStringList;
{this 'match info array' will be used to hold reference info on i) the
source file(name) ii) file position for each found match, and iii) the
size of each source file we scan -we can find the name by using the file
'number' as an index into a file list...}
mInfoArray: array[0..maxFoundMatches] of TMatchInfo;
{next variables help define the number of characters
separating words in 'AND' searches...}
ANDsearchWidth: Integer;
defaultANDsearchWidth: Integer;
{Boolean variable to help keep track of whether or not current search is
'case-sensitive' or 'case-insensitive' -the DEFAULT is case-sensitive...}
caseInSensitive: Boolean;
end;
var
SrchForm: TSrchForm;
implementation
uses SDUnit, PageUnit, SREUnit;
{NOTES ON OTHER UNITS: the ResultsForm holds a Memo component, -earlier version
used a RichEdit which is like a Memo but which allowed greater control at
run-time, of font styles in particular- however, it's easier to synchronise
scrolling between two Memos than it is between a Memo and a RichEdit. The
Memo component has the following properties set: ReadOnly is True,
ScrollBars is ssVertical, and the Font is 12 point...}
{NOTES ON OTHER UNITS: the SDForm (for Search Domain form) holds a FileListBox,
a DriveComboBox, a DirectoryListBox, and a FilterComboBox. These are set up to
talk to each other, allowing the user to specify a number of files to search
in, so they can add or subtract files as necessary...}
{NOTES ON OTHER UNITS: the PageForm holds the PageMemo which is used to
display a whole page of source text if the user double-clicks on a line
in the 'search results' window...}
{$R *.DFM}
procedure TSrchForm.PutClipTextInEditBox;
{here we simply put whatever text the user has selected in the main
'WP/Text-Editor' window INTO the Edit Box of the Search input form, leaving
the user to provide a Search Domain before they can proceed...}
var
inputOK: Boolean;
begin
{get text FROM the Clipboard...}
PatternEdit.Clear;
PatternEdit.PasteFromClipboard;
end;
procedure TSrchForm.FormCreate(Sender: TObject);
{use the 'tag' attribute of the main form to ascertain whether or not we
are accessing the form for the first time, and set various variables...}
begin
if (SrchForm.Tag = 0) then
begin
{it's very easy to forget to 'create' an instance of a
StringList object -which is what we do in the next line...}
fileList := TStringList.Create; {xxxDODGY???}
fileList.Clear;
end;
SrchForm.Tag := SrchForm.Tag + 1;
defaultAndsearchWidth := 10;
ANDsearchWidth := defaultAndsearchWidth;
{the minus 2 below is a safety measure...}
SrchForm.PatternEdit.MaxLength := (maxLineLen - 2);
{next line shouldn't be necessary but just to be on the safe side...}
searchType := 'DEFAULT_SEARCH';
caseInSensitive := False;
{the set below is a complete set of visible chars, BUT for our purposes
we must EXCLUDE the '+' character (Chr(44), to facilitate 'AND' searches)
so really the set below becomes the VisibleCharsMinusPlusSign set...}
VisibleChars := [Chr(33)..Chr(42),Chr(44)..Chr(126),Chr(145)..Chr(151),Chr(161)..Chr(171),
Chr(173)..Chr(174),Chr(176)..Chr(181),Chr(183)..Chr(255)];
{now use .INI file information to position things as they were when last used}
ReadINIFile;
end;
procedure TSrchForm.FormClose(Sender: TObject; var Action: TCloseAction);
begin
WriteIniFile;
Close;
Application.Terminate;
end;
procedure TSrchForm.CloseButtonClick(Sender: TObject);
{we DON'T want to close down this form completely or free up memory used for
our 'fileList' stringList. Better to HIDE the form and free up that memory in
a FormDestroy event handler...}
begin
SrchForm.Hide;
end;
procedure TSrchForm.FormDestroy(Sender: TObject);
begin
{free up the memory used by the file list for other applications...}
fileList.Free;
end;
function TSrchForm.CreateBuffer: Boolean;
{create a maxBufferSize buffer, but don't do anything with it...}
var
MemoryStatus: Boolean;
begin
MemoryStatus := True;
try {to allocate memory}
getmem(Buffer, maxBufferSize)
except
MemoryStatus := False;
end;
{return True if there IS enough memory, return False if there isn't...}
Result := MemoryStatus;
end;
procedure TSrchForm.DestroyBuffer;
{free the memory that Buffer points to...}
begin
freemem(Buffer, maxBufferSize);
end;
procedure TSrchForm.InitSkipArray;
{set up a 'skip' array for use in the search algorithm whereby
each character in the search pattern has a number associated with
it telling us how far we should move to the left in the source file
if we find a sourcefile character that is (somewhere) in the search
pattern... see elsewhere for psuedocode on the detail of the algorithm-
it's too esoteric to detail here...}
var
i, j: Integer;
begin
{set up the skip array so that all characters in the pattern have
a numeric value associated with them, which is THE NUMBER OF CHARACTERS
FROM THE END OF THE PATTERN. Where there is more than one instance of
a character, then use the RIGHTMOST. So that for 'weed', skip(d) = 0,
skip(e) = 1, and skip(w) = 3. For characters which do not appear in
the search pattern, then the corresponding value in the skip array is
the LENGTH OF THE SEARCH PATTERN. This enables us to jump forward
pattLen places in the source text where appropriate...}
for i := 0 to 255 do
begin
skip[i] := pattLen;
end;
for i := 0 to 255 do
begin
for j := 1 to (pattLen - 1) do
begin
if (searchPatt[j] = Chr(i)) then
begin
skip[i] := pattLen - j;
end; {end if}
end; {end for}
end; {end for}
end;
procedure TSrchForm.InitMInfoArray;
var
i: Integer;
begin
for i := 0 to maxFoundMatches do
begin
mInfoArray[i].fileNum := 0;
mInfoArray[i].filePos := 0;
mInfoArray[i].fileSize := 0;
end;
end;
procedure TSrchForm.InitialiseSearch;
{set the values of all manner of global variables, set up the 'pattern set'
and 'skip array' variables, display 'BUSY' caption and hourglass cursor, and
hide the 'search results' form...}
var
i: Integer;
begin
numMatches := 0;
currFileNum := 0;
stopSearch := False;
InitMInfoArray;
{in next two lines we determine the amount of text both before and
after a match to extract (along with the match sequence itself) and
by this means we extract a consistent total size of 'chunk'...}
halfaChunk := ((chunkLen - pattLen) div 2);
if (halfaChunk < 0) then halfaChunk := 10;
{if we've already done a search then ensure our Memo form is hidden-
this ensures that the user will not have to sit and wait while the program
writes to a displayed Memo (which would slow everything down)...}
ResultsForm.ClearMemos;
ResultsForm.Hide;
{now convert the search pattern to uppercase IF caseInSensitive is True...}
if (caseInsensitive = True) then
begin
searchPatt := AnsiUpperCase(searchPatt);
if (searchType = 'AND_SEARCH') then searchPattTwo := AnsiUpperCase(searchPattTwo);
end;
{create a 'skip array'...}
InitSkipArray;
{do stuff to show that the program IS actually running like shit off a stick}
SrchForm.Caption := ' Search Engine BUSY...';
Cursor := crHourglass;
end;
procedure TSrchForm.ProcessInputText;
{find out whether a search is a 'default' search (no '+' or '*' or '?'
characters in the input string) or whether the search will be an 'AND'
search, a 'wildcard' search, or a 'Qmark' search...}
{NOTE THAT WE DON'T FULLY VALIDATE USER INPUT HERE, and deliberately so,
since we want to give users the option of looking for whatever bizarre
sequence of characters takes their fancy. If input is off-the-wall-bizarre
then a search will simply find no matches -no problem, but we DO look for
more than one '+' character, and more than one '*' character, as such input
WILL be considered invalid. Similarly, there must only be one '+', one '*',
or several '?' characters in the input string...}
var
tempArray: array[0..maxInputLen] of Char;
i, inputLen, {asteriskCount, qMarkCount,} plusCount: Integer;
plusInputOK: Boolean;
begin
searchType := 'DEFAULT_SEARCH';
plusCount := 0;
{asteriskCount := 0;
qMarkCount := 0;}
searchPatt := patternEdit.Text;
inputLen := Length(searchPatt);
{if (inputLen = 0) then
begin
searchType := 'INVALID_SEARCH';
end;}
{if the inputted search string is longer than maxInputLen characters,
reduce it's length TO maxInputLen characters...}
if (inputLen >= maxInputLen) then
begin
for i := maxInputLen to (maxStringLen - 1) do
begin
searchPatt[i] := #0; {#0 is another way of saying Chr(0) aka NULL}
end;
end;
{copy the search pattern into an array...}
if (inputLen > 0) then
begin
for i := 1 to inputLen do
begin
tempArray[i - 1] := searchPatt[i];
end;
tempArray[i - 1] := #0;
end;
{and check out some dodgy characters in the search string...}
for i := 0 to (inputLen - 1) do
begin
if (tempArray[i] = '+') then
begin
searchType := 'AND_SEARCH';
Inc(plusCount);
end;
{IF at some future date code is amended to cater for 'wildcard' and
'question mark' searches, then use the commented-out lines below...}
{if (tempArray[i] = '*') then begin
searchType := 'WILD_SEARCH';
Inc(asteriskCount); end;
if (tempArray[i] = '?') then begin
searchType := 'QMARK_SEARCH';
Inc(qMarkCount); end;}
end;
if (plusCount > 1) then searchType := 'INVALID_SEARCH';
{if (asteriskCount > 1) then searchType := 'INVALID_SEARCH';
if ((plusCount > 0) and (asteriskCount > 0)) then searchType := 'INVALID_SEARCH';
if ((asteriskCount > 0) and (qMarkCount > 0)) then searchType := 'INVALID_SEARCH';
if ((qMarkCount > 0) and (plusCount > 0)) then searchType := 'INVALID_SEARCH';}
{I know you can do the above with hands and oars but doing it like this may
be more readable. If the search type is an 'AND search', first do one last
bit of input validation, and then parse out the two halves of the text...}
if (searchType = 'AND_SEARCH') then
begin
plusInputOK := CheckPlusInputOK;
end;
if (searchType = 'AND_SEARCH') then
begin
if (plusInputOk = True) then SeparateANDInput
else searchType := 'INVALID_SEARCH';
end;
if (searchType = 'DEFAULT_SEARCH') then pattLen := Length(searchPatt);
if (searchType = 'INVALID_SEARCH') then
begin
{searchTextOK := False;}
Application.MessageBox('Please check out Help for info on valid input', 'Invalid input', mb_OK);
end;
end;
function TSrchForm.CheckPlusInputOK: Boolean;
{this is a bit fiddly, but it has to be done! -Basically, we need to check
whether there ARE visible characters to the left and to the right of the
'+' character in the input string. Because if there aren't, we can't split
the string into two substrings properly...}
var
i, plusPos, inputLen: Integer;
visibleNLeftOK, visibleNRightOK: Boolean;
begin
visibleNLeftOK := False;
visibleNRightOK := False;
plusPos := Pos('+', searchPatt);
inputLen := Length(searchPatt);
if ((plusPos = 1) or (plusPos = inputLen)) then
begin
Result := False;
end
else
begin
for i := 1 to plusPos do
begin
if (searchPatt[i] in VisibleChars) then visibleNLeftOK := True;
end;
for i := plusPos to inputLen do
begin
if (searchPatt[i] in VisibleChars) then visibleNRightOK := True;
end;
if ((visibleNLeftOK = True) and (visibleNRightOK = True)) then Result := True
else Result := False;
end;
end;
procedure TSrchForm.SeparateANDInput;
{Delphi can be funny about copying characters from string to string on a
character-by-character basis so that's we we use a tempArray here...}
var
i, j, k, plusPos, inputLen: Integer;
tempArray: array[0..maxInputLen] of Char;
tempArrayOne: array[0..maxInputLen] of Char;
tempArrayTwo: array[0..maxInputLen] of Char;
begin
plusPos := Pos('+', searchPatt);
inputLen := Length(searchPatt);
j := 0;
{copy all characters from (and including) the '+' into tempArray...}
for i := plusPos to inputLen do
begin
tempArray[j] := searchPatt[i];
Inc(j);
end;
tempArray[j] := #0;
{copy all characters from UP TO the '+' into tempArrayOne...}
for i := 1 to (plusPos - 1) do
begin
tempArrayOne[i - 1] := searchPatt[i];
end;
tempArrayOne[i - 1] := #0;
{at this point we know the position of the '+' in searchPatt, and we have
all characters past (and including) the '+' copied into tempArray, and all
characters up the '+' copied into tempArrayOne, BUT we need to take account
of the fact that users might type in 'nature+man' for instance, or they
might type in 'nature + man', so we need to fiddle- firstly, go BACKWARDS
from plusPos in tempArrayOne, overwriting any non-visibles, including
spaces, with a NULL until we come to a VISIBLE character...}
i := (plusPos - 1);
{first draft of this used the IsCharAlphaNumeric function but that's not
such a good idea as it doesn't take account of user input that purposely
includes punctuation characters (etc?)...}
while (i >= 0) do
begin
Dec(i);
if (i = 0) then break;
if (not(tempArrayOne[i] in VisibleChars)) then
begin
tempArrayOne[i] := #0;
end
else break;
end;
{and copy this array back into the searchPatt variable...}
searchPatt := String(tempArrayOne);
{next, go FORWARD in tempArray to find the position of the
first visible character AFTER the '+'...}
for i := 1 to maxInputLen do
begin
if (tempArray[i] in VisibleChars) then break;
end;
{now copy characters from the first visible character (at index i)...}
k := 0;
for j := i to maxInputLen do
begin
tempArrayTwo[k] := tempArray[j];
if (tempArray[j] = #0) then break;
Inc(k);
end;
tempArrayTwo[k] := #0;
searchPattTwo := String(tempArrayTwo);
{at this point we have extracted two different string from what WAS the
searchPatt variable, so not only do we have a NEW and shorter searchPatt
but we also have a searchPattTwo as well, and we need to keep track of
the lengths of BOTH of these...}
pattLen := Length(searchPatt);
pattLenTwo := Length(searchPattTwo);
end;
procedure TSrchForm.SearchButtonClick(Sender: TObject);
{here we do various 'pre-search' tasks- checking that everything's OK before
we do the search: we check that there IS text in the 'PatternEdit' box, and
we check that we DO have at least one filename already (for the 'search' aka
'source' files) -next we check that we have enough memory for the buffer. If
we fail in any of these tasks we display an error message to the user. But if
things are OK so far, then we call InitialiseSearch to do various things such
as setting up the 'skip' array. Finally we call LoadSourceFile to load a file
into the buffer- the search routine itself being called from LoadSourceFile...}
var
searchTextOK, fileNameOK, memoryOK: Boolean;
fileRdStatus, currFileName, tempString: String;
i, numFiles: Integer;
exeFile: Boolean;
begin
searchTextOK := True;
fileNameOK := True;
memoryOK := True;
fileRdStatus := '';
exeFile := False;
ProcessInputText;
if (searchPatt = '') then
begin
searchTextOK := False;
Application.MessageBox('Please provide a search pattern in the input box provided', 'No search pattern', mb_OK);
end;
numFiles := fileList.Count;
if (numFiles = 0) then
begin
fileNameOK := False;
Application.MessageBox('Please choose file(s) to search in using the Search Domain button', 'No files specified', mb_OK);
end;
if (searchType = 'INVALID_SEARCH') then searchTextOK := False;
if ((searchTextOK) and (fileNameOK))then
begin
memoryOK := CreateBuffer;
if (memoryOK = False) then Application.MessageBox('The program does not have enough memory to play with -free up some system resources if possible', 'Not enough memory', mb_OK);
end;
if ((searchTextOK) and (fileNameOK) and (memoryOK)) then
begin
InitialiseSearch;
{the 'minus 1' next line is v. important, as the filelist is zero-based...}
for i := 0 to (numFiles - 1) do
begin
{go through the list of source files in sequence, loading each of them
into memory as we call LoadSourceFile -note that the SEARCH routine
itself is called from within the depths of the LoadSourceFile repeat
loop...}
currFileName := fileList[i];
{in line below, note that fileList indexing starts at 0 but we NEED
to start at 1 in the mInfoArray (below) which is why we make sure we
add 1 to currFileNum here...}
currFileNum := i + 1;
currFileSize := 0;
tempString := ExtractFileExt(currFileName);
if (tempString = '.exe') then exeFile := True;
if not(exeFile = True) then
begin
fileRdStatus := LoadSourceFile(currFileName);
{add file size to each entry in the mInfoArray
for the current source file...}
AddFileSize(i, currFileSize);
if (not(fileRdStatus = '')) then
begin
tempString := 'Cannot read ' + uppercase(currFileName) + '.' + #13 + fileRdStatus + '.';
Application.MessageBox(PChar(tempString), 'Error reading file', mb_OK);
end;
end; {if not exe file...}
end;
DestroyBuffer;
EndSearch;
end;
end;
procedure TSrchForm.AddFileSize(fileNumber: Integer; fileSize: Integer);
{add information on the size of the current file to the 'match info array'
making sure we only add this information to entries pertaining to the current
file...}
var
index: Integer;
begin
{add 1 to the fileNumber since although in the file list the first file
is numbered 0, we number them starting from 1 in the mInfoArray, OK...}
fileNumber := fileNumber + 1;
{firstly step through the array until we find the first entry for the
current file- note that each time we enter this procedure the value of
fileNumber will be the number of the last set of entries in the mInfoArray...}
index := 0;
while (index < maxFoundMatches) do
begin
if (mInfoArray[index].fileNum = fileNumber) then break;
Inc(index);
end;
{then use the value of the index variable to tell us where to START putting
values into the array. The presence of a 0 in the fileNum field tells us
when to STOP...}
while (index < maxFoundMatches) do
begin
if (mInfoArray[index].fileNum = 0) then break;
if (not(mInfoArray[index].fileNum = fileNumber)) then break;
mInfoArray[index].fileSize := currFileSize;
Inc(index);
end;
end;
function TSrchForm.LoadSourceFile(srcFileName: String): String;
{read a potentially big source text file in maxBufferSize
chunks, and call the Search() routine each time we load up
a new buffer-full of data...}
var
{declare the source file to be an UNTYPED file so that we
are then able to use Seek() and BlockRead() -which we could
not do if we opened the file as a 'text' file...}
srcFile: file;
readStatus: String;
bytesRead, blocksRead, i: Integer;
begin
readStatus := '';
AssignFile(srcFile, srcFileName);
try {to open source file}
reset(srcFile,1);
try {putting source file data into memory ie into the buffer}
repeat
bytesRead := 0;
blocksRead := 0;
{next line is very important (and so simple in Delphi!) -basically
it allows the program to be aware of the user pressing a key to
interrupt a search, if they HAVE pressed a key. Basically the
ProcessMessages() method interrupts the execution of the program
so that Windows can respond to events -the looked-for event in
this case being a user keypress... CURRENTLY HOWEVER IF IT'S THE
CASE THAT AT LEAST ONE SEARCH HAS ALREADY BEEN DONE THEN THE PROGRAM
IS QUICK TO PICK UP ON THE KEYPRESS, AND INTERRUPT THE SEARCH. IF ON
THE OTHER HAND IT'S THE FIRST SEARCH, THEN IT'S SLOW TO REACT -this
down to the intricacies of the Windows Message Loop -see also
SearchButtonKeyDown event handler below...}
Application.ProcessMessages;
if (stopSearch = False) then
begin
blockread(srcFile, Buffer^, sizeof(Buffer^), bytesRead);
Inc(blocksRead);
{note that we don't re-initialise the buffer each time we use
it- we don't need to, and doing that would slow things down.
However, on the final pass (where we grab a chunk of data that
is almost certain to be less than a 'bufferfull') the REST of
the buffer then will still have data in from the previous
blockread, so we must erase that, but, if we also add just one space
character to existing text this stops the EditLine procedure
truncating the last line of text if the last search pattern
character matches the last file character -anoraksville, man...}
if (bytesRead < maxBufferSize) then
begin
Buffer^[bytesRead + 1] := ' ';
for i := (bytesRead + 2) to maxBufferSize do
begin
{note the syntax below- the Buffer variable is a pointer type,
so to reference the character array to which it points, we use
the carat after the (pointer) variable name, okey dokey...}
Buffer^[i] := #0;
end;
end;
OverwriteDuffBufferChars;
if (blocksRead > 1) then
begin
{if Buffer is full when we come to read data in, (as code here
iterates in a repeat loop) then skip backwards pattLen bytes
in the source file -this will ensure that we don't miss a
searched-for pattern existing on a 'blockread boundary'...}
seek(srcFile, filepos(srcFile)-pattLen);
end;
{insert code here to convert all chars to uppercase if necessary...
see BMH algorithm sourcecode DelphiOne/Temp/Nine dir...}
if (caseInSensitive = True) then CapitaliseBuffer;
{call the appropriate search routine to search the current buffer,
which exists as a 'global' variable (so [of course] we don't need
to hand the buffer to the search routine as a parameter)...}
if (searchType = 'DEFAULT_SEARCH') then DefaultSearch;
if (searchType = 'AND_SEARCH') then ANDSearch;
if (blocksRead > 1) then
begin
{each time we read a block, add the last 'bytesRead' value to the
currFileSize variable so that we can i) find the true file pos for
a match by adding the search routine (current buffer) pointer to
currFileSize, and ii) when we finish reading a file the value of
currFileSize is then the TOTAL file, so we can put i) and ii) values
into the mInfoArray, BUT (are you listening carefully?) BECAUSE we
backtrack in the sourcefile pattLen characters on every blockread
after the first one, sIdx does not then reflect the 'true'
filePosition, which will in fact be pattLen bytes LESS than sIdx
plus the sum of prior blockreads, so we must cater for this, below}
currFileSize := ((currFileSize + bytesRead) - pattLen);
end
else
begin
currFileSize := currFileSize + bytesRead;
end;
end;
until ((bytesRead < maxBufferSize) or (bytesRead = 0)
or (numMatches >= maxFoundMatches) or (stopSearch = True));
finally
closefile(srcFile)
end; {putting source file data into memory}
except
on E: EInOutError do
begin
readStatus := GetError(E.ErrorCode);
end
end; {trying to open source file}
{if there hasn't been an error reading the source file then
return the empty string in readStatus. If there HAS been an
error then return an error message string...}
Result := readStatus;
end;
function TSrchForm.GetError (const ErrorCode: integer): string;
{return a string pertaining to the type of error. If IO-checking was off
we could check for errors by looking at IOResult, but in this program we
use an exception handler (in the file reading routine above) instead. The
strings listed below are taken from Borland's 'Object Pascal Language Guide'
for Delphi Version 1.0, pages 273-275...}
begin
case ErrorCode of
2: Result := 'File not found';
3: Result := 'Path not found';
4: Result := 'Too many open files';
5: Result := 'File access denied';
6: Result := 'Invalid file handle';
12: Result := 'Invalid file access code';
15: Result := 'Invalid drive';
100: Result := 'Disk read error';
101: Result := 'Disk write error';
102: Result := 'File not assigned';
103: Result := 'File not open';
else
Result := ''
end
end;
procedure TSrchForm.OverwriteDuffBufferChars;
{go right through the current buffer overwriting any characters we are sure
to consider undesirable with space characters. Doing this here, before we do
anything else, is good, since we are making sure that buffers don't have any
weird unprintable stuff in that might screw things up later on. So first
populate the allowableChars set variable with ALLOWABLE characters, which may
be either from the 'normal' ASCII range of 0 to 127 (from space to '~') and
which may also be from the 'extended' ASCII range of 128 to 255 (all the
'visibles' in that range). Note that while a routine such as this will be a
good idea in a program such as this, the definition of 'undesirable' is likely
to change from program to program depending on specific aims. Note that it
MAY be important that we ALLOW CR and LF chars here...}
type
TallowableChars = set of Char;
var
allowableChars: TallowableChars;
i: LongInt;
begin
allowableChars := [Chr(10), Chr(13), Chr(32)..Chr(126), Chr(145)..Chr(151),
Chr(161)..Chr(174), Chr(176)..Chr(181),Chr(183)..Chr(255)];
for i := 1 to maxBufferSize do
begin
if not(buffer[i] in allowableChars) then buffer[i] := ' ';
end;
end;
procedure TSrchForm.DefaultSearch;
{use a variation on the Boyer-Moore (aka 'mismatched character') algorithm
to search the current buffer for strings that match the user-input 'search
pattern' string. Where a match is found, extract chunkLen characters from
AROUND the 'match' position in the source file, and write them to a memo
component...}
var
sIdx, sIdxTmp, pIdx: Longint;
matchFound: Boolean;
begin
matchFound := True;
{sIdx is a 'sourceIndex' variable used to keep track of our position within
the source file. Note that initially we set it to pattLen, (remembering
that in the buffer the first byte has an index of 1) so that at first
we look for a match pattLen characters into the source text, NOT at the
beginning as one might at first expect...}
sIdx := pattLen;
{do main search loop -cleverer than it might appear at first glance..!}
repeat
matchFound := True;
sIdxTmp := sIdx;
for pIdx := pattLen downto 1 do
begin
if (buffer[sIdxTmp] = searchPatt[pIdx]) then
begin
Dec(sIdxTmp);
end
else
begin
matchFound := False;
break;
end;
end;
if (matchFound = True) then
begin
GetChunk(sIdxTmp);
mInfoArray[numMatches].fileNum := currFileNum;
mInfoArray[numMatches].filePos := currFileSize + sIdxTmp;
Inc(numMatches);
sIdx := sIdx + pattLen;
end
else
begin
sIdx := (sIdx + skip[ord(buffer[sIdx])]);
end;
until ((sIdx >= maxBufferSize) or (numMatches >= maxFoundMatches));
end;
procedure TSrchForm.ANDSearch;
{do a search where user input is in the form of TWO 'search pattern' variables.
Although it would be possible to adapt the Boyer-Moore algorithm such that an
amended BM technique uses the same method to search for both patterns, here
(for simplicity's sake) we use the Boyer-Moore method to search for the first
pattern, and then if we find a match, we do a 'brute-force' search backwards
and forwards from that position to look for an instance of the second pattern
(going backwards or forwards only ANDsearchWidth characters) -OK?...}
var
sIdx, sIdxTmp, pIdx, w, x, y, z, bruteIdx, bruteEndPos: Longint;
firstMatchFound, assumedMatch, secondMatchFound, bufEnd: Boolean;
tempPattArray: array[0..maxInputLen] of Char;
begin
firstMatchFound := True;
secondMatchFound := False;
assumedMatch := True;
bufEnd := False;
sIdx := pattLen;
{firstly copy the SECOND search pattern string into an array...}
for x := 1 to (pattLenTwo) do
begin
tempPattArray[x - 1] := searchPattTwo[x];
end;
tempPattArray[x - 1] := #0;
repeat
FirstMatchFound := True;
sIdxTmp := sIdx;
for pIdx := pattLen downto 1 do
begin
if (buffer[sIdxTmp] = searchPatt[pIdx]) then
begin
Dec(sIdxTmp);
end
else
begin
firstMatchFound := False;
break;
end;
end;
if (firstMatchFound = True) then
begin
{at this point we have found a match on the first pattern, with the
position of the first source file matched character held in sIdxTmp-
so firstly go back ANDsearchWidth places (while avoiding backtracking
past the start of the file/buffer)...}
if (sIdxTmp <= ANDsearchWidth) then bruteIdx := 1;
if (sIdxTmp >= ANDsearchWidth) then bruteIdx :=(sIdxTmp - ANDsearchWidth);
{now (assuming we don't go forward past the end of the file/buffer) we
simply look for a match against the second pattern by going forward
((2 x ANDsearchWidth) + pattLen) places. (Ignoring the complication
arising from going back less than ANDsearchWidth places from match pos
if we are near the start of the file, as that doesn't really matter)...}
bruteEndPos := (bruteIdx + (ANDsearchWidth * 2) + pattLen);
x := bruteIdx;
{if you didn't know, the brute-force search algorithm works like this:
i) from pos n in source file compare bytes one at a time against bytes
in searchPatt ii) if the search fails, then move source file position
(n) one place forward and do the same thing again...}
while ((x <= bruteEndPos) and (bufEnd = False) and (secondMatchFound = False)) do
begin
y := x;
z := 0;
while (assumedMatch = True) do
begin
if not(tempPattArray[z] = buffer[y]) then
assumedMatch := False
else
begin
Inc(z);
Inc(y);
end;
if (z = pattLenTwo) then secondMatchFound := True;
if (y >= maxBufferSize) then bufEnd := True;
end;
Inc(x);
assumedMatch := True;
end;
if (secondMatchFound = True) then
begin
{at this point we should (WILL!) have found a match on searchPattTwo...}
GetChunk(sIdxTmp);
mInfoArray[numMatches].fileNum := currFileNum;
mInfoArray[numMatches].filePos := currFileSize + sIdxTmp;
Inc(numMatches);
end;
secondMatchFound := False;
sIdx := sIdx + pattLen;
end
else
begin
sIdx := (sIdx + skip[ord(buffer[sIdx])]);
end;
until ((sIdx >= maxBufferSize) or (numMatches >= maxFoundMatches));
end;
procedure TSrchForm.CapitaliseBuffer;
{Perform our own DIY conversion routine. Careful examination of
characters in the range 0 to 255 reveals that there are two uninterrupted
sequences of lowercase characters that have uppercase equivalents (and vice
versa). One sequence is in the lower ('ASCII') range (ie 'a' to 'z') -the
other sequence is in the higher range (ie à to ı). Also, for both sequences
the uppercase equivalent of a character is 32 characters LESS than the
lowercase version. So, therefore, the algorithm will be: IF character in
range 97 to 122 inclusive, OR if character in range 224 to 253 inclusive,
then character = the character 32 characters 'behind'...}
var
i: Integer;
begin
for i := 1 to sizeof(Buffer^) do
begin
if (((Ord(Buffer^[i]) >= 97) and (Ord(Buffer^[i]) <= 122))) then
begin
Buffer^[i] := Chr(Ord(Buffer^[i]) - 32);
end;
if (((Ord(Buffer^[i]) >= 224) and (Ord(Buffer^[i]) <= 253))) then
begin
Buffer^[i] := Chr(Ord(Buffer^[i]) - 32);
end;
end;
{we might be scanning through NULLs at the end of the last
buffer but this doesn't matter...}
end;
procedure TSrchForm.GetChunk(bufPos: LongInt);
{extract a chunk of source file data around the position of the match and edit
that line, finally adding it to the the list of found match lines (extent as a
Memo component)...}
var
i: Integer;
srcChunk: array[0..chunkLen] of Char;
tempString: String;
firstSpacePos: LongInt;
firstSrcWord: Boolean;
begin
firstSrcWord := False;
{initialise both the 'chunk' array and 'tempString'...}
for i := 0 to (chunkLen - 1) do
begin
srcChunk[i] := #0;
end;
tempString := '';
{the following works around the fact that the EditLine routine WOULD remove
a match word if it was at the very first position in the source file unless
we add the space that we do here -note that IF bufPos is SOMEHWERE in the
middle of the first word (ie if the value of BufPos is less than the position
of the first space) then we DON'T handle that -attempting to do so creates
problems of it's own...}
if (bufPos = 0) then
begin
firstSrcWord := True;
end;
{extract text from the source file, going backwards HALF chunkLen
characters before the match, and HALF chunkLen characters after it,
placing characters into the srcChunk variable, which is a character
array (whose indexing therefore starts at 0)... note also the next
two lines to ensure that we don't try and backtrack past the START
of the buffer (which would surely cause havoc)...}
if (bufPos <= halfaChunk) then bufPos := 1;
if (bufPos >= halfaChunk) then bufPos := (bufPos - halfaChunk);
if (firstSrcWord = True) then
begin
srcChunk[0] := ' ';
i := 1;
end
else
begin
i := 0;
end;
while (i < chunkLen) do
begin
{avoid putting unnecc. line feed/return characters into memo lines...}
if ((buffer[bufPos] = #10) or (buffer[bufPos] = #13)) then
begin
{replace line feed/return characters with spaces...}
srcChunk[i] := ' ';
Inc(i);
end
else
begin
srcChunk[i] := buffer[bufPos];
Inc(i);
end;
{make sure we don't go past the end of srcChunk...}
if (i >= (chunkLen - 1)) then break;
Inc(bufPos);
{make sure we don't go past the end of the buffer...}
if (bufPos >= maxBufferSize) then break;
end; {while}
{terminate the srcChunk array with a NULL character...}
srcChunk[i] := #0;
tempString := EditLine(srcChunk, i);
ResultsForm.LinesMemo.Lines.Add(tempString);
end;
function TSrchForm.EditLine(tempLine: array of Char; i: Integer): String;
{this function takes as a parameter a char array which is a single line
(aka 'chunk') of source file text. The 'i' parameter is the length of the line.
At this point, however, we may have 'part-words' at the beginning and end of
the line, so this routine strips away these unwanted characters to produce
lines that start and end with whole words. The resulting 'edited' line is then
returned as a string...}
var
j, n: Integer;
tempArray: array[0..chunkLen] of Char;
tempString: String;
begin
{firstly go forward in the sourcefile 'chunk' until you get to a space...}
j := i;
i := 0;
while not(tempLine[i] = ' ') do
begin
Inc(i);
if (i >= j) then break;
end;
Inc(i);
n := 0;
{now put the whole of what remains into tempArray...}
while (i <= j) do
begin
tempArray[n] := tempLine[i];
Inc(i);
Inc(n);
end;
{and then backtrack from the END of temparray, until you get to a space,
placing a NULL character (#0) in the last 'non-space' position...}
while not(tempArray[n] = ' ') do
begin
Dec(n);
if (n <= 0) then break;
end;
tempArray[n] := #0;
tempString := string(tempArray);
Result := tempString;
end;
procedure TSrchForm.SearchButtonKeyDown(Sender: TObject; var Key: Word;
Shift: TShiftState);
{if the user has initiated a search (by clicking on the Search button
with the mouse) then the focus remains on the button as the search is
performed- SO, if the user wants to interrupt a search, then all we need
to do here is look for the ESC key being pressed: if ESC has been pressed,
then we set a global variable which the LoadSourceFile() routine keeps
tabs on from time to time...}
begin
{code here uses the 'virtual key code' for the ESC key...}
if (Key = VK_ESCAPE) then stopSearch := True;
end;
procedure TSrchForm.SDomainButtonClick(Sender: TObject);
begin
SDForm.SetSearchDomain;
{next line is necessary to ensure that we don't continually add to the
fileList- it's OK to clear it since when we call GetFilesNames (as below)
we can be sure that the fileList will be updated there with a CURRENT list
of 'search domain' filenames...}
fileList.Clear;
SDForm.GetFileNames;
{the two (re)-initialisations below preclude the possibility of the program
confusing file 1 in a previously-chosen 'page' window with file 1 of a
newly-defined search DOMAIN...}
ResultsForm.currLoadFile := 0;
ResultsForm.prevLoadFile := 0;
InitMInfoArray;
end;
procedure TSrchForm.EndSearch;
var
i, numMatches: Integer;
tempString: String;
begin
numMatches := ResultsForm.ProcessMInfoMemo;
if (numMatches = 0) then
begin
if (searchType = 'AND_SEARCH') then tempString := 'No matches found for ' + searchPatt + ' and ' + searchPattTwo
else tempString := 'No matches found for ' + searchPatt;
Application.MessageBox(PChar(tempString), 'Search results', mb_OK);
end
else
begin
if (caseInSensitive = True) then
begin
for i := 0 to ResultsForm.LinesMemo.Lines.Count do
begin
{having all uppercase text in the memo doesn't look
too hot so lowercase everything instead...}
ResultsForm.LinesMemo.Lines[i] := AnsiLowerCase(ResultsForm.LinesMemo.Lines[i]);
end;
end;
ResultsForm.CaptionResultsForm;
ResultsForm.Show;
end;
SrchForm.Caption := ' Search Engine SEARCH COMPLETED';
Cursor := crDefault;
end;
procedure TSrchForm.HelpButtonClick(Sender: TObject);
{run the Helpfile for the application -as it stands, the Helpfile
must be in the STARTUP directory along with the associated contents
file, so both Search.hlp and Search.cnt must be present...}
var
exeFileName, pathToExe, fileName: String;
begin
{because changing the directory in the file List box changes the overall
'current directory' such that the program won't find the Help file if
we're not careful, we need to keep track of the path to the .exe (and
therefore the .hlp) WHEN THE PROGRAM FIRST RUNS...}
exeFileName := Application.ExeName;
pathToExe := ExtractFilePath(exeFileName);
fileName := pathToExe + helpFileName;
Application.HelpFile := fileName;
Application.HelpCommand(HELP_CONTENTS, 0);
end;
procedure TSrchForm.ReadINIFile;
{read this module's INI file, which will be in the Windows directory...}
var
thisModuleIni: TIniFile;
begin
thisModuleIni := TIniFile.Create(iniFileName);
with thisModuleIni do
begin
try
SrchForm.Left := ReadInteger('SrchFormPos', 'Left', 0);
SrchForm.Top := ReadInteger('SrchFormPos', 'Top', 0);
{SrchForm.Width := ReadInteger('SrchFormPos', 'Width', 0);
SrchForm.Height := ReadInteger('SrchFormPos', 'Height', 0);}
finally
{free up memory used in 'creating' the INI file...}
Free;
end;
end;
end;
procedure TSrchForm.WriteINIFile;
var
thisModuleIni: TIniFile;
begin
thisModuleIni := TIniFile.Create(iniFileName);
with thisModuleIni do
begin
try
WriteInteger('SrchFormPos', 'Left', SrchForm.Left);
WriteInteger('SrchFormPos', 'Top', SrchForm.Top);
{WriteInteger('SrchFormPos', 'Width', SrchForm.Width);
WriteInteger('SrchFormPos', 'Height', SrchForm.Height);}
finally
{free up memory used in creating the INI file (again)...}
Free;
end;
end;
end;
end.