Title: How to parse an XML file
For example if we have a text
="xxx"
="xxx1"/
="xxx2"/
="xxx3"/
="xxx4"/
/ul
we will have one root object (named "UL") in wich we will have 4 children
(named "LI" with different sets of properties - from "NAME"="xxx1" to "NAME"="xxx4")
This is not a trivial task - so we will make a unit to solve this...
I will try to comment some here...
if you have any comments for this unit - write to me: sunworx@mail.ru; yz@infoteh.ru
*)
unit YZXMLParser;
interface
uses
SysUtils, ComCtrls;
type
THashElement = record
Key, Value: string;
end;
type
THashElementArr = array of THashElement;
// here we declare a THash class to use in our parser
// The concept of THash is to retreive named values from an array
// Hash is an array where index is a string (example V[Key]=value,
// whehe Key and Value are of type string)
// The main purpose of this class is to rerurn a value of a String-named key
//(example: s:=hash['someValue'])
// the description of a hash element we use
type
THash = class(TObject)
private
Arr: THashElementArr;
function GetValue(Key: string): string;
procedure SetValue(Key: string; const VValue: string);
function GetKeys: StrArr;
function GetValues: StrArr;
function GetCount: Integer;
function Getempty: Boolean;
public
property Value[Key: string]: string read GetValue write SetValue; default;
property Values: StrArr read GetValues;
property Keys: StrArr read GetKeys;
property Count: Integer read GetCount;
property Empty: Boolean read Getempty;
procedure Clear;
constructor Create;
destructor Destroy; override;
end;
TYZHash = THash;
type
// Here we declare some definitions for our parser to know what
// identifier we would receive next in our text
// these values will be used in the result of WhatNext() function which will scan text for keys
TYZXMLMarker = (xmlOpenTag, xmlCloseTagShort, xmlCloseTag, xmlCloseTagLong,
xmlEOF, xmlIdentifier, xmlunknown); / *
Because we use recursive definition of our class(as TreeView, where we declare children of
the same type in opur type
declaration) we must use forward declaration
* /
// The definition of a TAG class
TYZXMLTag = class;
TYZXMLTags = array of TYZXMLTag;
TYZXMLTag = class(TObject)
private
FData: TYZHash;
FParent: TYZXMLTag;
FName: string;
function GetValue(AName: string): string;
procedure SetName(const Value: string);
procedure SetValue(AName: string; const Value: string);
function GetCount: Integer;
function GetValueNames: strarr;
public
Children: TYZXMLTags; // these are our child nodes
Text: string;
property Name: string read FName write SetName; // name of a tag
property Values[AName: string]: string read GetValue write SetValue;
default; // values of properties of a tag (hash values)
property ValueNames: strarr read GetValueNames;
// array of strings returniong names of all props of this tag
property Count: Integer read GetCount;
// a count of children of a tag (if this tag is a container)
function SkipSpaces(var AData: string; var APos: Integer;
RememberBreaks: Boolean = False): Char;
// internal. for skip spaces (also CR or LF or other non-text chars) while parsing text
function ParseValue(var AData: string; var APos: Integer): Boolean;
// parse value (calling when found a parameter of a tag)
function ParseName(var AData: string; var APos: Integer): Boolean;
// parse key of parameter in a tag
// these two procs used to parse any text found while parsing XML
function ParseString(var AData: string; var APos: Integer;
RememberBreaks: Boolean = False): string;
function ParseQuotedString(var AData: string; var APos: Integer;
QIndef: Char = '"'): string;
// returnes the type of next identifier in XML
function WhatNext(var AData: string; var APos: Integer;
var ANext: Integer; RememberBreaks: Boolean = False): TYZXMLMarker;
// This is a main procedure of our class - AData is a string,
// containing all XML data (you can use TMemo.Text, for example, as a parameter of AData)
function ParseXML(var AData: string; var APos: Integer): Boolean;
// This function returnes a text string, built based on data, stored in an object.
function GenerateXML(var AData: string; ATab: string = ''): Boolean;
// returnes char from string at specified pos (#0 if not in range)
function CharAt(var S: string; APos: Integer): Char;
function TagNameExists(AName: string): Boolean;
// Adds a child to children array of a current tag
function AddChild: TYZXMLTag;
// Initializes current tag and deletes all existing children
procedure Clear; virtual;
constructor Create(AParent: TYZXMLTag); virtual;
destructor Destroy; virtual;
end;
type
TYZXMLParser = class(TYZXMLTag)
private
Header: TYZHash;
procedure _BuildTreeView(ATreeView: TTreeView; ANode: TTreeNode; ATag: TYZXMLTag);
public
property HeaderValues: TYZHash read Header;
procedure BuildTreeView(ATreeView: TTreeView);
function Parse(AData: string): Boolean;
function Generate(var AData: string): Boolean;
constructor Create;
destructor Destroy;
end;
implementation
//==============================================================================
{ TYZXMLTag }
function TYZXMLTag.AddChild: TYZXMLTag;
begin
setlength(children, Length(children) + 1);
Result := TYZXMLTag.Create(Self);
children[Length(children) - 1] := Result;
end;
//------------------------------------------------------------------------------
procedure TYZXMLTag.Clear;
var
i: Integer;
begin
for i := 0 to Count - 1 do if children[i] nil then Children[i].Destroy;
setlength(children, 0);
FData.Clear;
Text := '';
end;
//------------------------------------------------------------------------------
constructor TYZXMLTag.Create(AParent: TYZXMLTag);
begin
inherited Create;
FData := TYZHash.Create;
FParent := AParent;
Clear;
end;
//------------------------------------------------------------------------------
destructor TYZXMLTag.Destroy;
begin
Clear;
FData.Destroy;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.GetCount: Integer;
begin
Result := Length(children);
end;
//------------------------------------------------------------------------------
function TYZXMLTag.GetValue(AName: string): string;
begin
Result := FData[AName];
end;
//------------------------------------------------------------------------------
function TYZXMLTag.ParseName(var AData: string; var APos: Integer): Boolean;
begin
Result := False;
FName := ParseString(AData, APos);
if fname = '' then Exit;
Result := True;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.ParseQuotedString(var AData: string; var APos: Integer;
QIndef: Char = '"'): string;
var
i: Integer;
skipnext: Boolean;
z: Char;
begin
Result := '';
if CharAt(AData, APos) QIndef then Exit;
i := apos;
skipnext := True;
repeat
if not skipnext then
begin
if charat(AData, I) = '\' then SkipNext := True
else
begin
z := charat(AData, I);
if (Z = QIndef) or (z = #0) then
begin
Result := Copy(AData, aPos + 1, I - APos - 1);
// result:=exch(result,'\','');
APos := I + 1;
Exit;
end;
end;
end
else
skipnext := False;
Inc(i);
until False;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.ParseString(var AData: string; var APos: Integer;
RememberBreaks: Boolean = False): string;
const
extsym: string = '=;?*/';
var
nxt: Char;
x1, x2, i: Integer;
begin
Result := '';
nxt := SkipSpaces(AData, APos, RememberBreaks);
if nxt = #0 then Exit;
if (nxt = '"') or (nxt = '''') then
begin
Result := ParseQuotedString(AData, APos);
Exit;
end;
x1 := APos;
i := x1;
nxt := CharAt(AData, i);
while ((Ord(nxt) = 32) or (Pos(nxt, extsym) 0)) and (nxt #0) do
begin
Inc(i);
nxt := CharAt(AData, i);
end;
APos := i;
X1 := APos;
while (Ord(nxt) 32) and (Pos(nxt, extsym) = 0) do
begin
Inc(i);
nxt := CharAt(AData, i);
end;
x2 := i - x1;
Result := Copy(AData, x1, x2);
APos := i;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.ParseValue(var AData: string; var APos: Integer): Boolean;
var
n, v: string;
i, x: Integer;
begin
Result := False;
n := parseString(AData, APos);
if n = '' then Exit;
if skipspaces(AData, APos) '=' then Exit;
Inc(apos);
V := parseString(AData, APos);
fdata[n] := dequote(v);
Result := True;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.ParseXML(var AData: string; var APos: Integer): Boolean;
var
N: TYZXMLMarker;
nxt: Integer;
isLong: Boolean;
inTag: Boolean;
begin
isLong := False;
Result := False;
Clear;
if WhatNext(AData, APos, nxt) xmlOpenTag then Exit;
APos := nxt;
if WhatNext(AData, APos, nxt) xmlIdentifier then Exit;
Result := ParseName(AData, APos);
if not Result then Exit;
intag := True;
Result := False;
while True do
begin
N := WhatNext(AData, APos, nxt, (not intag and islong and (Count 0)));
case N of
xmlEOF: Exit;
xmlCloseTagLong:
begin
Result := True;
if islong then APos := nxt;
if (Text '') and (Count 0) then
begin
Text := exch(Text, #13#10#13#10, #13#10);
end;
Exit;
end;
xmlCloseTagShort:
begin
Result := (not IsLong) and intag;
if Result then APos := nxt;
Exit;
end;
xmlOpenTag:
begin
if islong then Result := AddChild.ParseXML(AData, APos)
else
begin
Result := False;
Exit;
end;
if not Result then Exit;
end;
xmlCloseTag:
begin
IsLong := True;
APos := nxt;
intag := False;
end;
xmlIdentifier:
begin
if intag then parsevalue(AData, APos)
else
Text := Text + ParseString(AData, APos, True)
end;
xmlUnknown:
begin
Result := True;
Exit;
end;
end;
end;
end;
//------------------------------------------------------------------------------
procedure TYZXMLTag.SetName(const Value: string);
begin
FName := Value;
end;
//------------------------------------------------------------------------------
procedure TYZXMLTag.SetValue(AName: string; const Value: string);
begin
FData[AName] := Value;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.SkipSpaces(var AData: string; var APos: Integer;
RememberBreaks: Boolean = False): Char;
var
L: Integer;
P: Char;
begin
L := Length(AData);
while APos = L do
begin
P := AData[APos];
if Ord(p) 32 then
begin
Result := p;
Exit;
end
else if rememberbreaks then
begin
if Pos(p, #13#9' ') 0 then
Text := Text + ' ';
end;
Inc(APos);
end;
Result := #0;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.CharAt(var S: string; APos: Integer): Char;
begin
Result := #0;
if (Length(s) ) or (apos 1) then Exit;
Result := s[APos];
end;
//------------------------------------------------------------------------------
function TYZXMLTag.WhatNext(var AData: string; var APos: Integer;
var ANext: Integer; RememberBreaks: Boolean = False): TYZXMLMarker;
var
s: string;
C: Char;
P: Integer;
begin
Result := xmlEOF;
P := APos;
C := SkipSpaces(AData, APos);
P := APos;
ANext := P;
if C = #0 then Exit;
if C = ' then if CharAt(AData, P + 1) = '/' then
begin
Inc(P, 2);
s := parsestring(AData, P);
if (uppercase(s) = uppercase(FName)) and (SkipSpaces(AData, P) = '') then
begin
ANext := P + 1;
Result := xmlCloseTagLong;
Exit;
end
else
begin
if TagNameExists(s) then
begin
Result := xmlCloseTagLong;
ANext := APos;
Exit;
end;
ANext := P + 1;
Result := xmlCloseTagLong;
Exit;
end;
end;
if C = ' then
begin
ANext := P + 1;
Result := xmlOpenTag;
Exit;
end;
if C = '' then
begin
ANext := P + 1;
Result := xmlCloseTag;
Exit;
end;
if C = '/' then if CharAt(AData, P + 1) = '' then
begin
ANext := P + 2;
Result := xmlCloseTagShort;
Exit;
end;
ANext := P;
parsestring(AData, ANext);
Result := xmlIdentifier;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.GetValueNames: strarr;
begin
Result := FData.Keys;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.GenerateXML(var AData: string; ATab: string = ''): Boolean;
var
valDelimiter: string;
spc: string;
i: Integer;
a: strarr;
begin
spc := ATab + #9;
if FData.Count 5 then valDelimiter := ' '
else
valDelimiter := #13#10 + spc;
AData := AData + #13#10 + ATab + ' + FName;
a := FData.keys;
for i := 0 to Length(a) - 1 do
begin
AData := AData + valDelimiter + a[i] + ' = "' + EnQuote(values[a[i]]) + '"';
end;
if (Count 0) or (Text '') then
begin
AData := AData + '' + Text;
for i := 0 to Count - 1 do
begin
Children[i].GenerateXML(AData, ATab + #9);
end;
AData := AData + #13#10 + ATab + '';
end
else
AData := AData + '/';
Result := True;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.TagNameExists(AName: string): Boolean;
begin
Result := AnsiUpperCase(AName) = AnsiUpperCase(Self.FName);
if Self.FParent = nil then Exit;
if not Result then Result := fparent.TagNameExists(AName);
end;
//==============================================================================
{ TYZXMLParser }
constructor TYZXMLParser.Create;
begin
Header := TYZHash.Create;
inherited Create(nil);
end;
//------------------------------------------------------------------------------
destructor TYZXMLParser.Destroy;
begin
inherited;
Header.Destroy;
end;
//------------------------------------------------------------------------------
procedure TYZXMLParser.BuildTreeView(ATreeView: TTreeView);
var
i: Integer;
begin
// clear;
ATreeView.Items.Clear;
for i := 0 to Count - 1 do _BuildTreeView(ATreeView, nil, children[i]);
end;
//------------------------------------------------------------------------------
procedure TYZXMLParser._BuildTreeView(ATreeView: TTreeView; ANode: TTreeNode;
ATag: TYZXMLTag);
var
i: Integer;
N: TTreeNode;
begin
N := ATreeView.Items.AddChildObject(ANode, ATag.Name + ' ' + FData['ID'], Pointer(ATag));
for i := 0 to ATag.Count - 1 do
begin
if ATag.children[i] nil then _BuildTreeView(ATreeView, N, ATag.children[i])
else
ATreeView.Items.AddChild(N, 'nil');
end;
N.Expanded := True;
end;
//------------------------------------------------------------------------------
function TYZXMLParser.Parse(AData: string): Boolean;
var
x1, x2, X, i: Integer;
s: string;
tmp: TYZXMLTag;
a: strarr;
N: TYZXMLMarker;
begin
X := 1;
Self.SkipSpaces(AData, X);
x2 := -1;
Result := False;
Clear;
Header.Clear;
x1 := Pos(', AData);
if x1 = X then
begin
x2 := Pos('?', AData);
if x2 then Exit;
s := uppercase(Copy(AData, x1 + 2, 4));
if Pos('XML ', s) 1 then Exit;
s := ' + Copy(AData, x1 + 6, x2 - x1 - 6) + '/';
tmp := TYZXMLTag.Create(nil);
tmp.ParseXML(s, x);
a := tmp.ValueNames;
for i := 0 to Length(a) - 1 do
Header[a[i]] := tmp.Values[a[i]];
tmp.Destroy;
x := x2 + 2;
end;
Result := True;
repeat
N := whatnext(AData, X, x1);
case N of
xmlOpenTag: Result := Result and AddChild.ParseXML(AData, X);
xmlIdentifier:
begin
if Text '' then Text := Text + ' ';
Text := Text + parsestring(AData, X, True);
end;
else
Parsestring(AData, X);
end;
until skipspaces(adata, x) = #0;
// if not result then ShowMessage('Error Parsing: '+inttostr(X));
end;
function TYZXMLParser.Generate(var AData: string): Boolean;
var
i: Integer;
a: strarr;
begin
Header['Date'] := DateTimeToStr(now);
a := header.Keys;
AData := ';
for i := 0 to Length(a) - 1 do
AData := AData + ' ' + a[i] + '="' + Header[a[i]] + '"';
AData := AData + '?'#13#10 + Text;
Result := True;
for i := 0 to Length(children) - 1 do
begin
Result := Result and children[i].generatexml(AData);
end;
end;
//==============================================================================
// procedures of THash class
//==============================================================================
{THASH CLASS}
procedure THash.Clear;
begin
SetLength(Arr, 0);
end;
constructor THash.Create;
begin
inherited;
Clear;
end;
//------------------------------------------------------------------------------
destructor THash.Destroy;
begin
Clear;
inherited;
end;
//------------------------------------------------------------------------------
function THash.GetCount: Integer;
begin
Result := Length(Arr);
end;
//------------------------------------------------------------------------------
function THash.Getempty: Boolean;
begin
Result := Length(Arr) = 0;
end;
function THash.GetKeys: StrArr;
var
i: Integer;
begin
SetLength(Result, Length(arr));
for i := 0 to Length(Result) - 1 do
Result[i] := arr[i].Key;
end;
//------------------------------------------------------------------------------
function THash.GetValue(Key: string): string;
var
i: Integer;
r: Boolean;
begin
Result := '';
i := 0;
r := False;
while (i Length(Arr)) and (not r) do
begin
if AnsiUpperCase(arr[i].key) = AnsiUpperCase(Key) then
begin
Result := Arr[i].Value;
r := True;
end;
i := i + 1;
end;
end;
//------------------------------------------------------------------------------
function THash.GetValues: StrArr;
var
i: Integer;
begin
SetLength(Result, Length(arr));
for i := 0 to Length(Result) - 1 do
Result[i] := arr[i].Value;
end;
//------------------------------------------------------------------------------
procedure THash.SetValue(Key: string; const VValue: string);
var
i, j: Integer;
r: Boolean;
E: THashElementArr;
begin
if VValue '' then
begin
i := 0;
r := False;
while (i Length(Arr)) and not r do
begin
if AnsiUpperCase(arr[i].key) = AnsiUpperCase(Key) then
begin
Arr[i].Value := VValue;
r := True;
end;
i := i + 1;
end;
if not r then
begin
SetLength(Arr, Length(arr) + 1);
arr[Length(arr) - 1].Key := Key;
arr[Length(arr) - 1].Value := Vvalue;
end;
end;
SetLength(E, Length(Arr));
for i := 0 to Length(arr) - 1 do E[i] := Arr[i];
SetLength(arr, 0);
for i := 0 to Length(E) - 1 do if (E[i].Key '') and (E[i].Value '') then
begin
j := Length(arr);
setlength(arr, j + 1);
arr[j] := E[i];
end;
end;
end.