Title: Parsing URI's
Question: Have you ever needed to break apart a URI to get its elements
Answer:
NOTE: if you have a Internet library then you already have URI parsing functions, however this may serve as an alternate way, a check on how the parsing algorithm works, a way to spend a credit article, a way to flame somebody else coding, etc
on Indy(Internet Direct) for URI parsing check TIdURI class on idURI unit
on TurboPower Internet Professional for IpMisc unit function IpParseURL
Dont know about ICS though
A URI is the way that an internet address presents itself, most protocols follow a URI. A URI has the following syntax:
[PROTOCOL + ://]HOST[:PORT][SUBDIRS][DOCUMENT][#+BOOKMARK]
Now here is the way I came up for URI parsing, the code is commented
function IsNumber(ACharacter: Char): Boolean;
begin
Result := Pos(ACharacter, '0123456789') 0;
end;
{Parse params from a URL into a TStrings
EG. http://search.yahoo.com/bin/search?p=britney+spears
Dest[0] = 'p=britney+spears'
To access params and values check TStringList.Names and
TStringList.Values}
procedure SplitParams(const Params: String; Dest: TStrings);
var
p: Integer;
Tmp: String;
begin
if not Assigned(Dest) then Exit;
Dest.Clear;
Tmp := Params;
p := Pos('&', Tmp);
while (p 0) do
begin
Dest.Add(Copy(Tmp, 1, p -1));
Tmp := Copy(Tmp, P+1, Length(Tmp) - p);
p := Pos('&', Tmp);
end;
Dest.Add(Tmp);
end;
{Given an URL check for a query, return the query starting
position, and the params in Params, this function uses SplitParams}
procedure ProcessQuery(const URL: String; Params: TStrings; var QueryPos: Integer);
begin
//Anything after the ? are params so send them to splitparams
QueryPos := Pos('?', URL);
if QueryPos 0 then
SplitParams(Copy(URL, QueryPos+1, Length(URL) -QueryPos), Params);
end;
{Given an URL get the document name
there are some special cases to this:
1) if URL is of form http://www.hello.com there is no document
but when queried to the server it will return one, so return
no document
2) if URL is of form http://www.hi.com/index.php or
http://www.hi.com/index.php#thisis or
http://www.hi.com/index.php#thisis?howareyou=hacker
then document is index.php forget anything else}
function QueryDocument(const URL: String; var DocPos: Integer): String;
var
QueryPos, i: Integer;
begin
Result := '';
//check for parameters
ProcessQuery(URL, nil, QueryPos);
if (QueryPos 0) then
begin
dec(QueryPos);
i := QueryPos;
while URL[QueryPos] '/' do dec(i);
inc(i);
Result := Copy(URL, i +1, QueryPos);
end
else
begin
{QueryPos is not found try to get either a docname or
check if no document at hand}
i := Length(URL);
while URL[i] '/' do dec(i);
inc(i);
DocPos := i;
if (i = Pos('://', URL)+3) then
Result := Copy(URL, i, Length(URL))
else
begin
//No document or url of the form www.hello.com
Result := '';
Exit;
end;
end;
//check for any bookmarks in the document
if Pos('#', Result) 0 then
Result := Copy(Result, 1, Pos('#', Result) -1);
if pos('/', Result) 0 then
Delete(Result, pos('/', Result), 1);
end;
{Query the port number of a url if any
EG. www.hello.com:8000
QueryPort will return 8000 else it will return 0}
function QueryPort(const URL: String): Integer;
var
DotIdx, SlashIdx: Integer;
Tmp, Buffer: String;
begin
Tmp := URL;
if Pos('://', Tmp) 0 then
Delete(Tmp, Pos('://', Tmp), 3);
SlashIdx := Pos('/', Tmp) -1;
if SlashIdx SlashIdx := Length(Tmp);
DotIdx := Pos(':', Tmp);
if DotIdx // no Port Number so exit gracefully
Exit;
Buffer := Copy(Tmp, DotIdx+1, SlashIdx);
val(Buffer, Result, DotIdx);
end;
{Query a Bookmark in a document
EG. www.hello.com/index.htm#notwelcome
QueryBookMark will return 'notwelcome'}
function QueryBookmark(const URL: String): String;
var
HashPos: Integer;
QuestionPos: Integer;
begin
HashPos:= Pos('#', URL);
if (HashPos QuestionPos:= Pos('?', URL);
if (QuestionPos QuestionPos := Length(URL);
Result := Copy(URL, HashPos+1, QuestionPos-1);
end;
procedure ParseURI(const URI: String; var Host, Document, BookMark, Port: String; QueryParams: TStrings);
var
QueryPos, DocPos: Integer;
begin
ProcessQuery(URI, QueryParams, QueryPos);
BookMark := QueryBookmark(URI);
Port := IntTOStr(QueryPort(URI));
Document := QueryDocument(URI, DocPos);
Host := Copy(URI, 1, DocPos -1);
end;
By the way this algorithm way can be extended, expanded, etc., and I am working on it. Any suggestions, comments, critics, drop a comment.
KNOWN BUGS:
* According to some sources the Pos function is not as fast as it should be and can not be used for reverse string positioning
* Yes I know this aint the best algorithm for URI parsing, as of this writing I am working out on enhancements, code enlargements etc.